Setup

The settings list settings configure which parts, simulations, and analysis steps of this R markdown are executed. This way single components of the analysis can be disabled for the purpose of saving computation time.

settings <- list(
  # Application of the full DANA pipeline to the benchmark data
  bench.DANA=T,
  # Application of the full DANA pipeline to the test data
  test.DANA=T,
  # Generate interactive plots of co-expression structures
  investigate.coexpression=F,
  # Comparison of coexpression estimation methods
  compare.corr.methods=T,
  # Full Pipeline comparison of two correlation methods
  compare.two.methods=F,
  # Compare DANA results for different cutoff choices
  compare.cutoffs=T,
  # Perform Differential Expression Analysis for test and benchmark data sets
  perform.DEA=T,
  # Generate paper figures
  generate.paper.figs=T,
  # Specify if paper figures are exported
  export.figures = T,
  # Path for file exports
  paper.fig.path = "../danawriteup/figs/",
  # Clears environment
  debug=TRUE
)
if(settings$generate.paper.figs) {
  settings$bench.DANA=TRUE
  settings$test.DANA=TRUE
}
if(settings$debug) rm(list=ls()[ls()!="settings"])

Parameter configuration

The config list contains all parameters for the analysis. Remember to always set the working directory and the paths to the data/external files prior to running the code.

config <- list(
  DANA.path = "./R/DANA.R",
  
  ## Data
  case.name = "MSK",
  # Read count data file for the full MSK data set
  data.file.path = "data/MSKDatasets.RData",
  # miRNA coordinates (chromosome and base pair location on chromosome)
  coords.file = "data/coords.msk.RData",
  # Restrict the analysis to a subtype 
  # Available: "all", "PMFH", and "MXF"
  data.subtype = "all",
  
  ## Normalization Methods
  # Specify which normalization methods will be applied
  norm.apply.TC = TRUE,
  norm.apply.UQ = TRUE,
  norm.apply.med = TRUE,
  norm.apply.TMM = TRUE,
  norm.apply.DESeq = TRUE,
  norm.apply.PoissonSeq = TRUE,
  norm.apply.QN = TRUE,
  norm.apply.RUV = TRUE,
  
  # thresholds for zero-expressed, poorly-expressed and well-expressed genes 
  t.zero = 2,  # zero-expressed in [0, t.zero)
  t.poor = 10,  # poorly-expressed in [t.zero, t.poor]
  t.well = 2^7,  # well-expressed in [t.well, inf)
  # distance threshold for clustering
  cluster.threshold = 10000,
  # preprocessing data transformation type: none ("n"), modified log2 ("log2"),
  # or cube root ("cube") available
  preprocess.transform = "log2",
  
  ## Correlation Estimation for positive and negative controls
  # Available methods | Tuning parameter calibration schemes
  # "mb"              | "cv", "aic", "bic", "av"
  # "huge.mb"         | "ric", "stars"
  # "glasso"          | "ric", "stars", "ebic"
  # "fastggm"         | none
  # "pearson"         | none
  # "spearman"        | none
  
  # Positive Controls
  corr.method.pos = "mb",
  tuning.criterion.pos = "bic",
  # Negative Controls
  corr.method.neg = "pearson",
  tuning.criterion.neg = "",
  # Generate plots within DANA
  generate.plots = FALSE  # generate comparison plots
)

Configuration for the differential expression analysis

config.DEA <- list(
  ## Data
  case.name = "MSK",

  ## Normalization Methods
  # Specify which normalization methods will be applied
  norm.apply.TC = TRUE,
  norm.apply.UQ = TRUE,
  norm.apply.med = TRUE,
  norm.apply.TMM = TRUE,
  norm.apply.DESeq = TRUE,
  norm.apply.PoissonSeq = TRUE,
  norm.apply.QN = TRUE,
  norm.apply.RUV = TRUE,
  
  # Significance level for DEA
  alpha = 0.01,
  # Plots
  generate.volcano.plot = TRUE,
  generate.meanvar.plot = TRUE,
  # Use q-values (adjusted p-values) instead of p-values
  q.values = FALSE,
  # RUV reduces the parameter size. Reduce DEA result to RUV genes?
  perform.subsetting = TRUE
)

Load Packages

Load all required R libraries/packages.

# DANA functions
source(config$DANA.path)
# Libraries
library(openxlsx)  # read excel files
library(corrplot) # For mixed correlation plots
library(cowplot) # Arrange plots
library(RColorBrewer) # Colors
library(latex2exp) # for latex in axis labels 
library(ggcorrplot) # ggplot2 correlation plots

Load Data

Load the dataset under study

We load the data set into memory and reduce the set one one subtype (MXF or PMFH) if necessary.

# Load the p x n matrix of read counts into the workspace
load(config$data.file.path)
# Transform gene names to lower case
rownames(benchmark) <- tolower(rownames(benchmark))
rownames(test) <- tolower(rownames(test))

# Rename
test.RC <- test 
bench.RC <- benchmark  

# Inspect
cat("Dimensions of test data: ", dim(test.RC), "\n")
## Dimensions of test data:  1033 54
cat("Dimensions of benchmark data: ", dim(bench.RC), "\n")
## Dimensions of benchmark data:  1033 54
# Set sample groups
groups <- ifelse(substr(colnames(test.RC), 1, 3)=="MXF", "MXF", "PMFH")

## Reduce to subtype
if(config$data.subtype != "all") {
  test.RC <- test.RC[, groups == config$data.subtype]
  bench.RC <- bench.RC[, groups == config$data.subtype]
  groups <- groups[groups == config$data.subtype]
}

miRNA Coordinates

We intend to map all genes in the data to their locations in the genome. Therefore, we build data frames mapping miRNAs to their coordinates on the genome. This coordinate is expressed through chromosome number and base-pair location.

The data frame coords uniquely maps each mature and star miRNA to their respective position on the genome. The name of the coords data frame entries corresponds to the mature/star name. The data frame has the following entries:

  • loc.index : Ordering index of all precursors on the genome
  • chr : The chromosome idnetifier on which the gene is located
  • start : Number of the first base pair of the miRNA on the chromosome
  • end : Number of the last base pair of the miRNA on the chromosome
  • corresponding : The name of the corresponding mature or star miRNA, respectively
load(config$coords.file)

Remove genes that cannot be found in the coordinates data frame

Some miRNAs map to multiple locations of sequence families. We named the sequence family with a parenthesis that reflects the number of members from all the different genomic locations (e.g. hsa-let-7a(3)). These miRNAs cannot be uniquely mapped to the genome, therefore we must exclude these from the location based analysis.

# only consider genes that are present in the coordinate data frame
genes.in.coords <- rownames(coords)[na.omit(match(rownames(test.RC), rownames(coords)))]
cat(dim(test.RC)[1]-length(genes.in.coords), " genes not found in coords. Reducing data set to ", length(genes.in.coords), "genes.\n")
## 72  genes not found in coords. Reducing data set to  961 genes.
# test data set
test.RC <- test.RC[genes.in.coords, ]
# benchmark data set
bench.RC <- bench.RC[genes.in.coords, ]
genes <- rownames(test.RC)
rm(genes.in.coords)

Examine the Data

First, we investigate the distribution of mean miRNA expression of the data.

# Histogram plot test data set
par(mfrow=c(1,1))
df <- data.frame(log.expression=log2(rowMeans(test.RC)+1))
print(ggplot(df, aes(x=log.expression)) + 
        geom_histogram(binwidth = .1, color="black", fill="black") +
        geom_vline(xintercept = log2(config$t.zero+1), color="blue", linetype="dashed") +
        geom_vline(xintercept = log2(config$t.poor+1), color="blue", linetype="dashed") +
        geom_vline(xintercept = log2(config$t.well+1), color="red",  linetype="dashed") +
        ggtitle("Test data set"))

rm(df)

# Histogram plot benchmark data set
par(mfrow=c(1,1))
df <- data.frame(log.expression=log2(rowMeans(bench.RC)+1))
print(ggplot(df, aes(x=log.expression)) + 
        geom_histogram(binwidth = .1, color="black", fill="black") +
        geom_vline(xintercept = log2(config$t.zero+1), color="blue", linetype="dashed") +
        geom_vline(xintercept = log2(config$t.poor+1), color="blue", linetype="dashed") +
        geom_vline(xintercept = log2(config$t.well+1), color="red",  linetype="dashed") +
        ggtitle("Benchmark data set"))

rm(df)

We observe that there is a large proportion of poorly-expressed genes. Some of them have extremely low or zero mean expression which corresponds to essentially zero reads.

Comparison of Coexpression Estimation Methods

if(settings$compare.corr.methods) {
  ## Preprocessing for test data set
  genes.test <- rownames(test.RC)
  
  # Apply Normalization
  test.norm <- normalize(test.RC,
                         groups=groups,
                         name="test",
                         apply.TC=T,
                         apply.UQ=T,
                         apply.med=T,
                         apply.TMM=T,
                         apply.DESeq=T,
                         apply.PoissonSeq=T,
                         apply.QN=T,
                         apply.RUV=T)
  
  # Define Controls
  pre.res <- define.controls(test.RC,
                             t.zero=config$t.zero,
                             t.poor=config$t.poor,
                             t.well=config$t.well,
                             t.cluster=config$cluster.threshold,
                             coords=coords,
                             clusters=NULL)
  pos.controls.test <- pre.res$genes.pos
  neg.controls.test <- pre.res$genes.neg
  clusters.test <- pre.res$clusters
  
  
  
  ## Estimate partial correlations and cc+ metric using DANA
  
  # mb (bic)
  cat("Estimating Models using method: mb.bic\n")
  DANA.test.mb.bic.res <- DANA(data.RC=test.RC, 
                       data.norm=test.norm, 
                       pos.controls=pos.controls.test, 
                       neg.controls=neg.controls.test, 
                       clusters=clusters.test,
                       coords=coords,
                       case.name="test",
                       generate.plots=F,
                       preprocess.transform="log2",
                       corr.method.pos="mb",
                       tuning.criterion.pos="bic",
                       corr.method.neg="pearson",
                       tuning.criterion.neg="")   
  
  # mb(aic)
  cat("Estimating Models using method: mb.aic\n")
  DANA.test.mb.aic.res <- DANA(data.RC=test.RC, 
                       data.norm=test.norm, 
                       pos.controls=pos.controls.test, 
                       neg.controls=neg.controls.test, 
                       clusters=clusters.test,
                       coords=coords,
                       case.name="test",
                       generate.plots=F,
                       preprocess.transform="log2",
                       corr.method.pos="mb",
                       tuning.criterion.pos="aic",
                       corr.method.neg="pearson",
                       tuning.criterion.neg="")   
  
  # mb (av)
  cat("Estimating Models using method: mb.av\n")
  DANA.test.mb.av.res <- DANA(data.RC=test.RC, 
                       data.norm=test.norm, 
                       pos.controls=pos.controls.test, 
                       neg.controls=neg.controls.test, 
                       clusters=clusters.test,
                       coords=coords,
                       case.name="test",
                       generate.plots=F,
                       preprocess.transform="log2",
                       corr.method.pos="mb",
                       tuning.criterion.pos="av",
                       corr.method.neg="pearson",
                       tuning.criterion.neg="")   
  
  # mb (cv)
  cat("Estimating Models using method: mb.cv\n")
  DANA.test.mb.cv.res <- DANA(data.RC=test.RC, 
                       data.norm=test.norm, 
                       pos.controls=pos.controls.test, 
                       neg.controls=neg.controls.test, 
                       clusters=clusters.test,
                       coords=coords,
                       case.name="test",
                       generate.plots=F,
                       preprocess.transform="log2",
                       corr.method.pos="mb",
                       tuning.criterion.pos="cv",
                       corr.method.neg="pearson",
                       tuning.criterion.neg="") 
  
  # glasso (ric)
  cat("Estimating Models using method: glasso(ric)\n")
  DANA.test.glasso.ric.res <- DANA(data.RC=test.RC, 
                       data.norm=test.norm, 
                       pos.controls=pos.controls.test, 
                       neg.controls=neg.controls.test, 
                       clusters=clusters.test,
                       coords=coords,
                       case.name="test",
                       generate.plots=F,
                       preprocess.transform="log2",
                       corr.method.pos="glasso",
                       tuning.criterion.pos="ric",
                       corr.method.neg="pearson",
                       tuning.criterion.neg="") 
  
  # glasso (stars)
  cat("Estimating Models using method: glasso(stars)\n")
  DANA.test.glasso.stars.res <- DANA(data.RC=test.RC, 
                       data.norm=test.norm, 
                       pos.controls=pos.controls.test, 
                       neg.controls=neg.controls.test, 
                       clusters=clusters.test,
                       coords=coords,
                       case.name="test",
                       generate.plots=F,
                       preprocess.transform="log2",
                       corr.method.pos="glasso",
                       tuning.criterion.pos="stars",
                       corr.method.neg="pearson",
                       tuning.criterion.neg="") 
  
  # huge.mb (ric)
  cat("Estimating Models using method: huge.mb.ric\n")
  DANA.test.huge.mb.ric.res <- DANA(data.RC=test.RC, 
                       data.norm=test.norm, 
                       pos.controls=pos.controls.test, 
                       neg.controls=neg.controls.test, 
                       clusters=clusters.test,
                       coords=coords,
                       case.name="test",
                       generate.plots=F,
                       preprocess.transform="log2",
                       corr.method.pos="huge.mb",
                       tuning.criterion.pos="ric",
                       corr.method.neg="pearson",
                       tuning.criterion.neg="") 
  
  # huge.mb (stars)
  cat("Estimating Models using method: huge.mb.stars\n")
  DANA.test.huge.mb.stars.res <- DANA(data.RC=test.RC, 
                       data.norm=test.norm, 
                       pos.controls=pos.controls.test, 
                       neg.controls=neg.controls.test, 
                       clusters=clusters.test,
                       coords=coords,
                       case.name="test",
                       generate.plots=F,
                       preprocess.transform="log2",
                       corr.method.pos="huge.mb",
                       tuning.criterion.pos="stars",
                       corr.method.neg="pearson",
                       tuning.criterion.neg="") 
  
  # FastGGM
  cat("Estimating Models using method: fastggm\n")
  DANA.test.fastggm.res <- DANA(data.RC=test.RC, 
                       data.norm=test.norm, 
                       pos.controls=pos.controls.test, 
                       neg.controls=neg.controls.test, 
                       clusters=clusters.test,
                       coords=coords,
                       case.name="test",
                       generate.plots=F,
                       preprocess.transform="log2",
                       corr.method.pos="fastggm",
                       tuning.criterion.pos="",
                       corr.method.neg="pearson",
                       tuning.criterion.neg="") 
  
  # Pearson correlations
  cat("Estimating Models using method: pearson\n")
  DANA.test.pearson.res <- DANA(data.RC=test.RC, 
                       data.norm=test.norm, 
                       pos.controls=pos.controls.test, 
                       neg.controls=neg.controls.test, 
                       clusters=clusters.test,
                       coords=coords,
                       case.name="test",
                       generate.plots=F,
                       preprocess.transform="log2",
                       corr.method.pos="pearson",
                       tuning.criterion.pos="",
                       corr.method.neg="pearson",
                       tuning.criterion.neg="") 
  
  # Spearman correlations
  cat("Estimating Models using method: spearman\n")
  DANA.test.spearman.res <- DANA(data.RC=test.RC, 
                       data.norm=test.norm, 
                       pos.controls=pos.controls.test, 
                       neg.controls=neg.controls.test, 
                       clusters=clusters.test,
                       coords=coords,
                       case.name="test",
                       generate.plots=F,
                       preprocess.transform="log2",
                       corr.method.pos="spearman",
                       tuning.criterion.pos="",
                       corr.method.neg="pearson",
                       tuning.criterion.neg="") 
  
  
  # Fix normalization method names (remove "test." from names)
  rownames(DANA.test.mb.bic.res$metrics) <- substr(rownames(DANA.test.mb.bic.res$metrics), 6, 20)
  rownames(DANA.test.mb.aic.res$metrics) <- substr(rownames(DANA.test.mb.aic.res$metrics), 6, 20)
  rownames(DANA.test.mb.cv.res$metrics) <- substr(rownames(DANA.test.mb.cv.res$metrics), 6, 20)
  rownames(DANA.test.mb.av.res$metrics) <- substr(rownames(DANA.test.mb.av.res$metrics), 6, 20)
  rownames(DANA.test.glasso.ric.res$metrics) <- substr(rownames(DANA.test.glasso.ric.res$metrics), 6, 20)
  rownames(DANA.test.glasso.stars.res$metrics) <- substr(rownames(DANA.test.glasso.stars.res$metrics), 6, 20)
  rownames(DANA.test.huge.mb.ric.res$metrics) <- substr(rownames(DANA.test.huge.mb.ric.res$metrics), 6, 20)
  rownames(DANA.test.huge.mb.stars.res$metrics) <- substr(rownames(DANA.test.huge.mb.stars.res$metrics), 6, 20)
  rownames(DANA.test.fastggm.res$metrics) <- substr(rownames(DANA.test.fastggm.res$metrics), 6, 20)
  rownames(DANA.test.pearson.res$metrics) <- substr(rownames(DANA.test.pearson.res$metrics), 6, 20)
  rownames(DANA.test.spearman.res$metrics) <- substr(rownames(DANA.test.spearman.res$metrics), 6, 20)
  
  
  ## Display results
  par(mfrow=c(1,1))
  
  # Compute correlation of cc+ metric for different estimation methods
  compare.test.cc <- cbind(DANA.test.mb.bic.res$metrics$cc,
                           DANA.test.mb.aic.res$metrics$cc,
                           DANA.test.mb.cv.res$metrics$cc,
                           DANA.test.mb.av.res$metrics$cc,
                           DANA.test.glasso.ric.res$metrics$cc,
                           DANA.test.glasso.stars.res$metrics$cc,
                           DANA.test.huge.mb.ric.res$metrics$cc,
                           DANA.test.huge.mb.stars.res$metrics$cc,
                           DANA.test.fastggm.res$metrics$cc,
                           DANA.test.pearson.res$metrics$cc,
                           DANA.test.spearman.res$metrics$cc)
  rownames(compare.test.cc) <- rownames(DANA.test.mb.bic.res$metrics)
  colnames(compare.test.cc) <- c("mb.bic", "mb.aic", "mb.cv","mb.av", "glasso.ric", "glasso.stars", "huge.mb.ric", "huge.mb.stars", "fastggm", "pearson", "spearman")
  stargazer(compare.test.cc, type="text", summary=FALSE, digits=3, 
              title="Test data - cc+", align=TRUE)
  corrplot.mixed(cor(compare.test.cc),lower="circle",upper="number")
  
  
  ## Display result statistics plot
  # MB (bic)
  p.test.mb.bic.stats <- plot.DANA.metrics(DANA.test.mb.bic.res$metrics, label.repel=TRUE) + ggtitle("MB (BIC)")
  print(p.test.mb.bic.stats)
  # MB (aic)
  p.test.mb.aic.stats <- plot.DANA.metrics(DANA.test.mb.aic.res$metrics, label.repel=TRUE) + ggtitle("MB (AIC)")
  print(p.test.mb.aic.stats)
  # MB (cv)
  p.test.mb.cv.stats <- plot.DANA.metrics(DANA.test.mb.cv.res$metrics, label.repel=TRUE) + ggtitle("MB (CV)")
  print(p.test.mb.cv.stats)
  # MB (av)
  p.test.mb.av.stats <- plot.DANA.metrics(DANA.test.mb.av.res$metrics, label.repel=TRUE) + ggtitle("MB (AV)")
  print(p.test.mb.av.stats)
  # glasso (ric)
  p.test.glasso.ric.stats <- plot.DANA.metrics(DANA.test.glasso.ric.res$metrics, label.repel=TRUE) + ggtitle("glasso (RIC)")
  print(p.test.glasso.ric.stats)
  # glasso (stars)
  p.test.glasso.stars.stats <- plot.DANA.metrics(DANA.test.glasso.stars.res$metrics, label.repel=TRUE) + ggtitle("glasso (StARS)")
  print(p.test.glasso.stars.stats)
  # huge.mb (ric)
  p.test.huge.mb.ric.stats <- plot.DANA.metrics(DANA.test.huge.mb.ric.res$metrics, label.repel=TRUE) + ggtitle("MB (RIC)")
  print(p.test.huge.mb.ric.stats)
  # huge.mb (stars)
  p.test.huge.mb.stars.stats <- plot.DANA.metrics(DANA.test.huge.mb.stars.res$metrics, label.repel=TRUE) + ggtitle("MB (StARS)")
  print(p.test.huge.mb.stars.stats)
  # FastGGM
  p.test.fastggm.stats <- plot.DANA.metrics(DANA.test.fastggm.res$metrics, label.repel=TRUE) + ggtitle("FastGGM")
  print(p.test.fastggm.stats)
  # Pearson
  p.test.pearson.stats <- plot.DANA.metrics(DANA.test.pearson.res$metrics, label.repel=TRUE) + ggtitle("Pearson")
  print(p.test.pearson.stats)
  # Spearman
  p.test.spearman.stats <- plot.DANA.metrics(DANA.test.spearman.res$metrics, label.repel=TRUE) + ggtitle("Spearman")
  print(p.test.spearman.stats)

}
## converting counts to integer mode
## 313 genes has been filtered because they contains too small number of reads across the experiments.Number of positive control markers with RC in [128, inf): 115
## Number of negative control markers with RC in [2, 10]: 102
## Estimating Models using method: mb.bic
## Estimating correlations for pos. and neg. controls for data set test...done
## Estimating correlations for pos. and neg. controls for data set test.TMM...done
## Estimating correlations for pos. and neg. controls for data set test.DESeq...done
## Estimating correlations for pos. and neg. controls for data set test.PoissonSeq...done
## Estimating correlations for pos. and neg. controls for data set test.TC...done
## Estimating correlations for pos. and neg. controls for data set test.Med...done
## Estimating correlations for pos. and neg. controls for data set test.RUVg...done
## Estimating correlations for pos. and neg. controls for data set test.RUVs...done
## Estimating correlations for pos. and neg. controls for data set test.RUVr...done
## Estimating correlations for pos. and neg. controls for data set test.UQ...done
## Estimating correlations for pos. and neg. controls for data set test.QN...done
## Comparing models test vs. test.TMM....done
## Comparing models test vs. test.DESeq....done
## Comparing models test vs. test.PoissonSeq....done
## Comparing models test vs. test.TC....done
## Comparing models test vs. test.Med....done
## Comparing models test vs. test.RUVg....done
## Comparing models test vs. test.RUVs....done
## Comparing models test vs. test.RUVr....done
## Comparing models test vs. test.UQ....done
## Comparing models test vs. test.QN....done
## Estimating Models using method: mb.aic
## Estimating correlations for pos. and neg. controls for data set test...done
## Estimating correlations for pos. and neg. controls for data set test.TMM...done
## Estimating correlations for pos. and neg. controls for data set test.DESeq...done
## Estimating correlations for pos. and neg. controls for data set test.PoissonSeq...done
## Estimating correlations for pos. and neg. controls for data set test.TC...done
## Estimating correlations for pos. and neg. controls for data set test.Med...done
## Estimating correlations for pos. and neg. controls for data set test.RUVg...done
## Estimating correlations for pos. and neg. controls for data set test.RUVs...done
## Estimating correlations for pos. and neg. controls for data set test.RUVr...done
## Estimating correlations for pos. and neg. controls for data set test.UQ...done
## Estimating correlations for pos. and neg. controls for data set test.QN...done
## Comparing models test vs. test.TMM....done
## Comparing models test vs. test.DESeq....done
## Comparing models test vs. test.PoissonSeq....done
## Comparing models test vs. test.TC....done
## Comparing models test vs. test.Med....done
## Comparing models test vs. test.RUVg....done
## Comparing models test vs. test.RUVs....done
## Comparing models test vs. test.RUVr....done
## Comparing models test vs. test.UQ....done
## Comparing models test vs. test.QN....done
## Estimating Models using method: mb.av
## Estimating correlations for pos. and neg. controls for data set test...done
## Estimating correlations for pos. and neg. controls for data set test.TMM...done
## Estimating correlations for pos. and neg. controls for data set test.DESeq...done
## Estimating correlations for pos. and neg. controls for data set test.PoissonSeq...done
## Estimating correlations for pos. and neg. controls for data set test.TC...done
## Estimating correlations for pos. and neg. controls for data set test.Med...done
## Estimating correlations for pos. and neg. controls for data set test.RUVg...done
## Estimating correlations for pos. and neg. controls for data set test.RUVs...done
## Estimating correlations for pos. and neg. controls for data set test.RUVr...done
## Estimating correlations for pos. and neg. controls for data set test.UQ...done
## Estimating correlations for pos. and neg. controls for data set test.QN...done
## Comparing models test vs. test.TMM....done
## Comparing models test vs. test.DESeq....done
## Comparing models test vs. test.PoissonSeq....done
## Comparing models test vs. test.TC....done
## Comparing models test vs. test.Med....done
## Comparing models test vs. test.RUVg....done
## Comparing models test vs. test.RUVs....done
## Comparing models test vs. test.RUVr....done
## Comparing models test vs. test.UQ....done
## Comparing models test vs. test.QN....done
## Estimating Models using method: mb.cv
## Estimating correlations for pos. and neg. controls for data set test...done
## Estimating correlations for pos. and neg. controls for data set test.TMM...done
## Estimating correlations for pos. and neg. controls for data set test.DESeq...done
## Estimating correlations for pos. and neg. controls for data set test.PoissonSeq...done
## Estimating correlations for pos. and neg. controls for data set test.TC...done
## Estimating correlations for pos. and neg. controls for data set test.Med...done
## Estimating correlations for pos. and neg. controls for data set test.RUVg...done
## Estimating correlations for pos. and neg. controls for data set test.RUVs...done
## Estimating correlations for pos. and neg. controls for data set test.RUVr...done
## Estimating correlations for pos. and neg. controls for data set test.UQ...done
## Estimating correlations for pos. and neg. controls for data set test.QN...done
## Comparing models test vs. test.TMM....done
## Comparing models test vs. test.DESeq....done
## Comparing models test vs. test.PoissonSeq....done
## Comparing models test vs. test.TC....done
## Comparing models test vs. test.Med....done
## Comparing models test vs. test.RUVg....done
## Comparing models test vs. test.RUVs....done
## Comparing models test vs. test.RUVr....done
## Comparing models test vs. test.UQ....done
## Comparing models test vs. test.QN....done
## Estimating Models using method: glasso(ric)
## Estimating correlations for pos. and neg. controls for data set test...done
## Estimating correlations for pos. and neg. controls for data set test.TMM...done
## Estimating correlations for pos. and neg. controls for data set test.DESeq...done
## Estimating correlations for pos. and neg. controls for data set test.PoissonSeq...done
## Estimating correlations for pos. and neg. controls for data set test.TC...done
## Estimating correlations for pos. and neg. controls for data set test.Med...done
## Estimating correlations for pos. and neg. controls for data set test.RUVg...done
## Estimating correlations for pos. and neg. controls for data set test.RUVs...done
## Estimating correlations for pos. and neg. controls for data set test.RUVr...done
## Estimating correlations for pos. and neg. controls for data set test.UQ...done
## Estimating correlations for pos. and neg. controls for data set test.QN...done
## Comparing models test vs. test.TMM....done
## Comparing models test vs. test.DESeq....done
## Comparing models test vs. test.PoissonSeq....done
## Comparing models test vs. test.TC....done
## Comparing models test vs. test.Med....done
## Comparing models test vs. test.RUVg....done
## Comparing models test vs. test.RUVs....done
## Comparing models test vs. test.RUVr....done
## Comparing models test vs. test.UQ....done
## Comparing models test vs. test.QN....done
## Estimating Models using method: glasso(stars)
## Estimating correlations for pos. and neg. controls for data set test...done
## Estimating correlations for pos. and neg. controls for data set test.TMM...done
## Estimating correlations for pos. and neg. controls for data set test.DESeq...done
## Estimating correlations for pos. and neg. controls for data set test.PoissonSeq...done
## Estimating correlations for pos. and neg. controls for data set test.TC...done
## Estimating correlations for pos. and neg. controls for data set test.Med...done
## Estimating correlations for pos. and neg. controls for data set test.RUVg...done
## Estimating correlations for pos. and neg. controls for data set test.RUVs...done
## Estimating correlations for pos. and neg. controls for data set test.RUVr...done
## Estimating correlations for pos. and neg. controls for data set test.UQ...done
## Estimating correlations for pos. and neg. controls for data set test.QN...done
## Comparing models test vs. test.TMM....done
## Comparing models test vs. test.DESeq....done
## Comparing models test vs. test.PoissonSeq....done
## Comparing models test vs. test.TC....done
## Comparing models test vs. test.Med....done
## Comparing models test vs. test.RUVg....done
## Comparing models test vs. test.RUVs....done
## Comparing models test vs. test.RUVr....done
## Comparing models test vs. test.UQ....done
## Comparing models test vs. test.QN....done
## Estimating Models using method: huge.mb.ric
## Estimating correlations for pos. and neg. controls for data set test...done
## Estimating correlations for pos. and neg. controls for data set test.TMM...done
## Estimating correlations for pos. and neg. controls for data set test.DESeq...done
## Estimating correlations for pos. and neg. controls for data set test.PoissonSeq...done
## Estimating correlations for pos. and neg. controls for data set test.TC...done
## Estimating correlations for pos. and neg. controls for data set test.Med...done
## Estimating correlations for pos. and neg. controls for data set test.RUVg...done
## Estimating correlations for pos. and neg. controls for data set test.RUVs...done
## Estimating correlations for pos. and neg. controls for data set test.RUVr...done
## Estimating correlations for pos. and neg. controls for data set test.UQ...done
## Estimating correlations for pos. and neg. controls for data set test.QN...done
## Comparing models test vs. test.TMM....done
## Comparing models test vs. test.DESeq....done
## Comparing models test vs. test.PoissonSeq....done
## Comparing models test vs. test.TC....done
## Comparing models test vs. test.Med....done
## Comparing models test vs. test.RUVg....done
## Comparing models test vs. test.RUVs....done
## Comparing models test vs. test.RUVr....done
## Comparing models test vs. test.UQ....done
## Comparing models test vs. test.QN....done
## Estimating Models using method: huge.mb.stars
## Estimating correlations for pos. and neg. controls for data set test...done
## Estimating correlations for pos. and neg. controls for data set test.TMM...done
## Estimating correlations for pos. and neg. controls for data set test.DESeq...done
## Estimating correlations for pos. and neg. controls for data set test.PoissonSeq...done
## Estimating correlations for pos. and neg. controls for data set test.TC...done
## Estimating correlations for pos. and neg. controls for data set test.Med...done
## Estimating correlations for pos. and neg. controls for data set test.RUVg...done
## Estimating correlations for pos. and neg. controls for data set test.RUVs...done
## Estimating correlations for pos. and neg. controls for data set test.RUVr...done
## Estimating correlations for pos. and neg. controls for data set test.UQ...done
## Estimating correlations for pos. and neg. controls for data set test.QN...done
## Comparing models test vs. test.TMM....done
## Comparing models test vs. test.DESeq....done
## Comparing models test vs. test.PoissonSeq....done
## Comparing models test vs. test.TC....done
## Comparing models test vs. test.Med....done
## Comparing models test vs. test.RUVg....done
## Comparing models test vs. test.RUVs....done
## Comparing models test vs. test.RUVr....done
## Comparing models test vs. test.UQ....done
## Comparing models test vs. test.QN....done
## Estimating Models using method: fastggm
## Estimating correlations for pos. and neg. controls for data set test...Conducting fastggm precision estimation for test....done
## done
## Estimating correlations for pos. and neg. controls for data set test.TMM...Conducting fastggm precision estimation for test.TMM....done
## done
## Estimating correlations for pos. and neg. controls for data set test.DESeq...Conducting fastggm precision estimation for test.DESeq....done
## done
## Estimating correlations for pos. and neg. controls for data set test.PoissonSeq...Conducting fastggm precision estimation for test.PoissonSeq....done
## done
## Estimating correlations for pos. and neg. controls for data set test.TC...Conducting fastggm precision estimation for test.TC....done
## done
## Estimating correlations for pos. and neg. controls for data set test.Med...Conducting fastggm precision estimation for test.Med....done
## done
## Estimating correlations for pos. and neg. controls for data set test.RUVg...Conducting fastggm precision estimation for test.RUVg....done
## done
## Estimating correlations for pos. and neg. controls for data set test.RUVs...Conducting fastggm precision estimation for test.RUVs....done
## done
## Estimating correlations for pos. and neg. controls for data set test.RUVr...Conducting fastggm precision estimation for test.RUVr....done
## done
## Estimating correlations for pos. and neg. controls for data set test.UQ...Conducting fastggm precision estimation for test.UQ....done
## done
## Estimating correlations for pos. and neg. controls for data set test.QN...Conducting fastggm precision estimation for test.QN....done
## done
## Comparing models test vs. test.TMM....done
## Comparing models test vs. test.DESeq....done
## Comparing models test vs. test.PoissonSeq....done
## Comparing models test vs. test.TC....done
## Comparing models test vs. test.Med....done
## Comparing models test vs. test.RUVg....done
## Comparing models test vs. test.RUVs....done
## Comparing models test vs. test.RUVr....done
## Comparing models test vs. test.UQ....done
## Comparing models test vs. test.QN....done
## Estimating Models using method: pearson
## Estimating correlations for pos. and neg. controls for data set test...done
## Estimating correlations for pos. and neg. controls for data set test.TMM...done
## Estimating correlations for pos. and neg. controls for data set test.DESeq...done
## Estimating correlations for pos. and neg. controls for data set test.PoissonSeq...done
## Estimating correlations for pos. and neg. controls for data set test.TC...done
## Estimating correlations for pos. and neg. controls for data set test.Med...done
## Estimating correlations for pos. and neg. controls for data set test.RUVg...done
## Estimating correlations for pos. and neg. controls for data set test.RUVs...done
## Estimating correlations for pos. and neg. controls for data set test.RUVr...done
## Estimating correlations for pos. and neg. controls for data set test.UQ...done
## Estimating correlations for pos. and neg. controls for data set test.QN...done
## Comparing models test vs. test.TMM....done
## Comparing models test vs. test.DESeq....done
## Comparing models test vs. test.PoissonSeq....done
## Comparing models test vs. test.TC....done
## Comparing models test vs. test.Med....done
## Comparing models test vs. test.RUVg....done
## Comparing models test vs. test.RUVs....done
## Comparing models test vs. test.RUVr....done
## Comparing models test vs. test.UQ....done
## Comparing models test vs. test.QN....done
## Estimating Models using method: spearman
## Estimating correlations for pos. and neg. controls for data set test...done
## Estimating correlations for pos. and neg. controls for data set test.TMM...done
## Estimating correlations for pos. and neg. controls for data set test.DESeq...done
## Estimating correlations for pos. and neg. controls for data set test.PoissonSeq...done
## Estimating correlations for pos. and neg. controls for data set test.TC...done
## Estimating correlations for pos. and neg. controls for data set test.Med...done
## Estimating correlations for pos. and neg. controls for data set test.RUVg...done
## Estimating correlations for pos. and neg. controls for data set test.RUVs...done
## Estimating correlations for pos. and neg. controls for data set test.RUVr...done
## Estimating correlations for pos. and neg. controls for data set test.UQ...done
## Estimating correlations for pos. and neg. controls for data set test.QN...done
## Comparing models test vs. test.TMM....done
## Comparing models test vs. test.DESeq....done
## Comparing models test vs. test.PoissonSeq....done
## Comparing models test vs. test.TC....done
## Comparing models test vs. test.Med....done
## Comparing models test vs. test.RUVg....done
## Comparing models test vs. test.RUVs....done
## Comparing models test vs. test.RUVr....done
## Comparing models test vs. test.UQ....done
## Comparing models test vs. test.QN....done
## 
## Test data - cc+
## ===============================================================================================================
##            mb.bic mb.aic mb.cv mb.av glasso.ric glasso.stars huge.mb.ric huge.mb.stars fastggm pearson spearman
## ---------------------------------------------------------------------------------------------------------------
##    TMM     0.955  0.975  0.976 0.983   0.948       0.911        0.959        0.979      0.977   0.875   0.704  
##   DESeq    0.952  0.973  0.963 0.975   0.939       0.899        0.956        0.971      0.973   0.859   0.682  
## PoissonSeq 0.960  0.966  0.977 0.982   0.939       0.910        0.939        0.977      0.967   0.800   0.646  
##     TC     0.961  0.968  0.973 0.978   0.948       0.898        0.944        0.974      0.977   0.931   0.831  
##    Med     0.682  0.701  0.714 0.753   0.794       0.782        0.590        0.703      0.777   0.702   0.852  
##    RUVg    0.960  0.970  0.953 0.965   0.928       0.896        0.929        0.969      0.971   0.842   0.586  
##    RUVs    0.920  0.916  0.908 0.898   0.862       0.796        0.853        0.910      0.878   0.660   0.514  
##    RUVr    0.812  0.824  0.861 0.867   0.678       0.528        0.667        0.784      0.920   0.536   0.436  
##     UQ     0.701  0.742  0.744 0.796   0.842       0.809        0.637        0.747      0.794   0.771   0.763  
##     QN     0.938  0.951  0.936 0.962   0.903       0.871        0.901        0.938      0.942   0.806   0.588  
## ---------------------------------------------------------------------------------------------------------------

DANA - Performance Analysis of Normalization Methods

We apply the full analysis pipeline to the data set. This includes:

  1. Apply the normalization methods to the raw counts.
  2. Define positive and negative controls based on read counts.
  3. Estimate the level of coexpression between miRNAs for each data set.
  4. Compare pos and neg controls before and after normalization.

The following parameters are used:

  • Case name: MSK
  • Control definition:
    • Definition of negative controls, read count in [2, 10]
    • Definition of positive controls, read count in [128, inf]
    • Distance threshold for clustering: 10^{4}
  • Data preprocessing transformation: log2
  • Coexpression estimation:
    • Correlation estimation method for positive controls: mb
    • Correlation estimation tuning parameter calibration method for positive controls: bic
  • Comparison statistics:
    • Generate plots? FALSE

Benchmark Data Set

if(settings$bench.DANA) {
  genes <- rownames(bench.RC)
  
  ## Apply Normalization
  data.norm <- normalize(X=bench.RC,
                         groups=groups,
                         name="benchmark",
                         apply.TC=config$norm.apply.TC,
                         apply.UQ=config$norm.apply.UQ,
                         apply.med=config$norm.apply.med,
                         apply.TMM=config$norm.apply.TMM,
                         apply.DESeq=config$norm.apply.DESeq,
                         apply.PoissonSeq=config$norm.apply.PoissonSeq,
                         apply.QN=config$norm.apply.QN,
                         apply.RUV=config$norm.apply.RUV)
  bench.norm <- data.norm
  
  ## Define Controls
  pre.res <- define.controls(bench.RC,
                             t.zero=config$t.zero,
                             t.poor=config$t.poor,
                             t.well=config$t.well,
                             t.cluster=config$cluster.threshold,
                             coords=coords,
                             clusters=NULL)
  pos.controls <- pre.res$genes.pos
  pos.controls.bench <- pre.res$genes.pos
  neg.controls <- pre.res$genes.neg
  neg.controls.bench <- pre.res$genes.neg
  clusters <- pre.res$clusters
  clusters.bench <- clusters
  
  # Apply DANA pipeline
  DANA.bench <- DANA(data.RC=bench.RC, 
                     data.norm=bench.norm, 
                     pos.controls=pos.controls.bench, 
                     neg.controls=neg.controls.bench, 
                     clusters=clusters.bench,
                     coords=coords,
                     case.name="benchmark",
                     generate.plots=config$generate.plots,
                     preprocess.transform=config$preprocess.transform,
                     corr.method.pos=config$corr.method.pos,
                     tuning.criterion.pos=config$tuning.criterion.pos,
                     corr.method.neg=config$corr.method.neg,
                     tuning.criterion.neg=config$tuning.criterion.neg) 
  
  if(!config$generate.plots) {
    stargazer(DANA.bench$metrics, type="text", summary=FALSE, digits=4, 
              title="Comparison statistics for the benchmark data set", align=TRUE)
  }
  iplot.bench <- iggplot.corr(DANA.bench$data.model$pos$corr, clusters=clusters.bench, title="Benchmark data set - Positive controls (un-normalized)", coords=coords)
  iplot.bench.TMM <- iggplot.corr(DANA.bench$norm.models$benchmark.TMM$pos$corr, clusters=clusters.bench, title="Benchmark data set - Positive controls (TMM)", coords=coords)
}
## converting counts to integer mode
## 212 genes has been filtered because they contains too small number of reads across the experiments.Number of positive control markers with RC in [128, inf): 179
## Number of negative control markers with RC in [2, 10]: 120
## Estimating correlations for pos. and neg. controls for data set benchmark...done
## Estimating correlations for pos. and neg. controls for data set benchmark.TMM...done
## Estimating correlations for pos. and neg. controls for data set benchmark.DESeq...done
## Estimating correlations for pos. and neg. controls for data set benchmark.PoissonSeq...done
## Estimating correlations for pos. and neg. controls for data set benchmark.TC...done
## Estimating correlations for pos. and neg. controls for data set benchmark.Med...done
## Estimating correlations for pos. and neg. controls for data set benchmark.RUVg...done
## Estimating correlations for pos. and neg. controls for data set benchmark.RUVs...done
## Estimating correlations for pos. and neg. controls for data set benchmark.RUVr...done
## Estimating correlations for pos. and neg. controls for data set benchmark.UQ...done
## Estimating correlations for pos. and neg. controls for data set benchmark.QN...done
## Comparing models benchmark vs. benchmark.TMM....done
## Comparing models benchmark vs. benchmark.DESeq....done
## Comparing models benchmark vs. benchmark.PoissonSeq....done
## Comparing models benchmark vs. benchmark.TC....done
## Comparing models benchmark vs. benchmark.Med....done
## Comparing models benchmark vs. benchmark.RUVg....done
## Comparing models benchmark vs. benchmark.RUVs....done
## Comparing models benchmark vs. benchmark.RUVr....done
## Comparing models benchmark vs. benchmark.UQ....done
## Comparing models benchmark vs. benchmark.QN....done
## 
## Comparison statistics for the benchmark data set
## ==================================
##                        cc    mscr 
## ----------------------------------
##    benchmark.TMM     0.9606 0.6348
##   benchmark.DESeq    0.9605 0.6282
## benchmark.PoissonSeq 0.9627 0.6455
##     benchmark.TC     0.9553 0.5375
##    benchmark.Med     0.9036 0.5518
##    benchmark.RUVg    0.9797 0.5178
##    benchmark.RUVs    0.8399 0.5046
##    benchmark.RUVr    0.9373 0.6353
##     benchmark.UQ     0.9155 0.5691
##     benchmark.QN     0.9216 0.6296
## ----------------------------------

Test Data Set

if(settings$test.DANA) {
  genes <- rownames(test.RC)
  
  ## Apply Normalization
  data.norm <- normalize(test.RC,
                         groups=groups,
                         name="test",
                         apply.TC=config$norm.apply.TC,
                         apply.UQ=config$norm.apply.UQ,
                         apply.med=config$norm.apply.med,
                         apply.TMM=config$norm.apply.TMM,
                         apply.DESeq=config$norm.apply.DESeq,
                         apply.PoissonSeq=config$norm.apply.PoissonSeq,
                         apply.QN=config$norm.apply.QN,
                         apply.RUV=config$norm.apply.RUV)
  test.norm <- data.norm
  
  ## Define Controls
  pre.res <- define.controls(test.RC,
                             t.zero=config$t.zero,
                             t.poor=config$t.poor,
                             t.well=config$t.well,
                             t.cluster=config$cluster.threshold,
                             coords=coords,
                             clusters=NULL)
  pos.controls <- pre.res$genes.pos
  pos.controls.test <- pre.res$genes.pos
  neg.controls <- pre.res$genes.neg
  neg.controls.test <- pre.res$genes.neg
  clusters <- pre.res$clusters
  clusters.test <- clusters
  
  # Apply DANA pipeline
  DANA.test <- DANA(data.RC=test.RC, 
                    data.norm=test.norm, 
                    pos.controls=pos.controls.test, 
                    neg.controls=neg.controls.test, 
                    clusters=clusters.test,
                    coords=coords,
                    case.name="test",
                    generate.plots=config$generate.plots,
                    preprocess.transform=config$preprocess.transform,
                    corr.method.pos=config$corr.method.pos,
                    tuning.criterion.pos=config$tuning.criterion.pos,
                    corr.method.neg=config$corr.method.neg,
                    tuning.criterion.neg=config$tuning.criterion.neg) 
  
  if(!config$generate.plots) {
    stargazer(DANA.test$metrics, type="text", summary=FALSE, digits=4, 
              title="Comparison metrics for the test data set", align=TRUE)
  }
  iplot.test <- iggplot.corr(DANA.test$data.model$pos$corr, clusters=clusters.test, title="Test data set - Positive controls (un-normalized)", coords=coords)
  iplot.test.TMM <- iggplot.corr(DANA.test$norm.models$test.TMM$pos$corr, clusters=clusters.test, title="Test data set - Positive controls (TMM)", coords=coords)
}
## converting counts to integer mode
## 313 genes has been filtered because they contains too small number of reads across the experiments.Number of positive control markers with RC in [128, inf): 115
## Number of negative control markers with RC in [2, 10]: 102
## Estimating correlations for pos. and neg. controls for data set test...done
## Estimating correlations for pos. and neg. controls for data set test.TMM...done
## Estimating correlations for pos. and neg. controls for data set test.DESeq...done
## Estimating correlations for pos. and neg. controls for data set test.PoissonSeq...done
## Estimating correlations for pos. and neg. controls for data set test.TC...done
## Estimating correlations for pos. and neg. controls for data set test.Med...done
## Estimating correlations for pos. and neg. controls for data set test.RUVg...done
## Estimating correlations for pos. and neg. controls for data set test.RUVs...done
## Estimating correlations for pos. and neg. controls for data set test.RUVr...done
## Estimating correlations for pos. and neg. controls for data set test.UQ...done
## Estimating correlations for pos. and neg. controls for data set test.QN...done
## Comparing models test vs. test.TMM....done
## Comparing models test vs. test.DESeq....done
## Comparing models test vs. test.PoissonSeq....done
## Comparing models test vs. test.TC....done
## Comparing models test vs. test.Med....done
## Comparing models test vs. test.RUVg....done
## Comparing models test vs. test.RUVs....done
## Comparing models test vs. test.RUVr....done
## Comparing models test vs. test.UQ....done
## Comparing models test vs. test.QN....done
## 
## Comparison metrics for the test data set
## =============================
##                   cc    mscr 
## -----------------------------
##    test.TMM     0.9551 0.4326
##   test.DESeq    0.9518 0.4442
## test.PoissonSeq 0.9604 0.3410
##     test.TC     0.9605 0.4368
##    test.Med     0.6819 0.3410
##    test.RUVg    0.9597 0.4787
##    test.RUVs    0.9203 0.3423
##    test.RUVr    0.8117 0.7253
##     test.UQ     0.7010 0.3435
##     test.QN     0.9377 0.2591
## -----------------------------

Investigate Co-expression Structures

We compare the estimated co-expression structures for the test and benchmark dataset. Here, we take a look at the models for the un-normalized data as well as TMM-normalized data. Note that you cannot directly compare the graphs of benchmark with test data since the considered set of genes is not identical.

if(settings$investigate.coexpression && settings$bench.DANA && settings$test.DANA) {
  htmltools::tagList(list(iplot.bench, iplot.bench.TMM, iplot.test, iplot.test.TMM))  # show plots in markdown
}

Plot co-expressions using interactive plots

We compare the estimated co-expression structures for the test and benchmark dataset. Here, we take a look at the models for the un-normalized data as well as TMM-normalized data. Note that you cannot directly compare the graphs of benchmark with test data since the considered set of genes is not identical.

if(settings$investigate.coexpression && settings$bench.DANA && settings$test.DANA) {
  # Generate additional results for pearson correlation
  DANA.test.pearson.res <- DANA(data.RC=test.RC, 
                                data.norm=test.norm, 
                                pos.controls=pos.controls.test, 
                                neg.controls=neg.controls.test, 
                                clusters=clusters.test,
                                coords=coords,
                                case.name="test",
                                generate.plots=F,
                                preprocess.transform="log2",
                                corr.method.pos="pearson",
                                tuning.criterion.pos="",
                                corr.method.neg="pearson",
                                tuning.criterion.neg="") 
  
  iplot.pearson.bench <- iggplot.corr(DANA.test.pearson.res$data.model$pos$corr, clusters=clusters.bench, title="Benchmark data set - Positive controls (un-normalized) - Pearson Correlation", coords=coords)
  iplot.pearson.bench.TMM <- iggplot.corr(DANA.test.pearson.res$norm.models$test.TMM$pos$corr, clusters=clusters.bench, title="Benchmark data set - Positive controls (TMM)", coords=coords)
  iplot.bench <- iggplot.corr(DANA.bench$data.model$pos$corr, clusters=clusters.bench, title="Benchmark data set - Positive controls (un-normalized)", coords=coords)
  iplot.bench.TMM <- iggplot.corr(DANA.bench$norm.models$benchmark.TMM$pos$corr, clusters=clusters.bench, title="Benchmark data set - Positive controls (TMM)", coords=coords)
  iplot.test <- iggplot.corr(DANA.test$data.model$pos$corr, clusters=clusters.test, title="Test data set - Positive controls (un-normalized)", coords=coords)
  iplot.test.TMM <- iggplot.corr(DANA.test$norm.models$test.TMM$pos$corr, clusters=clusters.test, title="Test data set - Positive controls (TMM)", coords=coords)
  
  
  iplot.pearson.bench.neg <- iggplot.corr(DANA.test.pearson.res$data.model$neg$corr, clusters=clusters.bench, title="Benchmark data set - Negative controls (un-normalized) - Pearson Correlation", coords=coords)
  iplot.pearson.bench.TMM.neg <- iggplot.corr(DANA.test.pearson.res$norm.models$test.TMM$neg$corr, clusters=clusters.test, title="Benchmark data set - Negative controls (TMM)", coords=coords)
  iplot.bench.neg <- iggplot.corr(DANA.bench$data.model$neg$corr, clusters=clusters.bench, title="Benchmark data set - Negative controls (un-normalized)", coords=coords)
  iplot.bench.TMM.neg <- iggplot.corr(DANA.bench$norm.models$benchmark.TMM$neg$corr, clusters=clusters.bench, title="Benchmark data set - Negative controls (TMM)", coords=coords)
  iplot.test.neg <- iggplot.corr(DANA.test$data.model$neg$corr, clusters=clusters.test, title="Test data set - Negative controls (un-normalized)", coords=coords)
  iplot.test.TMM.neg <- iggplot.corr(DANA.test$norm.models$test.TMM$neg$corr, clusters=clusters.test, title="Test data set - Negative controls (TMM)", coords=coords)
  
  htmltools::tagList(list(iplot.pearson.bench.neg, iplot.pearson.bench.TMM.neg, iplot.bench.neg, iplot.bench.TMM.neg, iplot.test.neg, iplot.test.TMM.neg))  # show plots in markdown
  htmltools::tagList(list(iplot.pearson.bench.neg, iplot.pearson.bench.TMM.neg, iplot.bench.neg, iplot.bench.TMM.neg, iplot.test.neg, iplot.test.TMM.neg))  # show plots in markdown
}

Comparison of two Co-expression Estimation Methods

In this section, we compare two methods for estimating the co-expression of miRNAs to each other. The methods can be chosen individually to positive and negative controls and include all available marginal and partial correlation methods.

Computation

if(settings$compare.two.methods) {
  ## Setup
  show.plots <- TRUE 
  # Method for estimating partial correlations
  method1.name <- "Partial Correlations (Meinshausen-Buehlmann, bic)"
  method1.pos <- "mb"
  tuning.method1.pos <- "bic"  
  method1.neg <- "mb"
  tuning.method1.neg <- "bic"
  # Method for estimating marginal correlations
  method2.name <- "Marginal Correlations (Pearson)"
  method2.pos <- "mb"
  tuning.method2.pos <- "bic"  
  method2.neg <- "pearson"
  tuning.method2.neg <- ""
  
  ## Apply Normalization
  data.norm <- normalize(test.RC,
                         groups=groups,
                         name="test",
                         apply.TC=config$norm.apply.TC,
                         apply.UQ=config$norm.apply.UQ,
                         apply.med=config$norm.apply.med,
                         apply.TMM=config$norm.apply.TMM,
                         apply.DESeq=config$norm.apply.DESeq,
                         apply.PoissonSeq=config$norm.apply.PoissonSeq,
                         apply.QN=config$norm.apply.QN,
                         apply.RUV=config$norm.apply.RUV)
  
  
  ## Define Controls
  # Define Polycistronic Clusters and Controls
  pre.res <- define.controls(test.RC,
                             t.zero=config$t.zero,
                             t.poor=config$t.poor,
                             t.well=config$t.well,
                             t.cluster=config$cluster.threshold,
                             coords=coords,
                             clusters=NULL)
  pos.controls <- pre.res$genes.pos
  neg.controls <- pre.res$genes.neg
  clusters <- pre.res$clusters
  rm(pre.res)
  
  
  
  
  ## Apply DANA pipeline
  # Method 1
  DANA.method1 <- DANA(data.RC=test.RC, 
                       data.norm=data.norm, 
                       pos.controls=pos.controls, 
                       neg.controls=neg.controls, 
                       clusters=clusters,
                       coords=coords,
                       case.name="test",
                       generate.plots=show.plots,
                       preprocess.transform=config$preprocess.transform,
                       corr.method.pos=method1.pos,
                       tuning.criterion.pos=tuning.method1.pos,
                       corr.method.neg=method1.neg,
                       tuning.criterion.neg=tuning.method1.neg) 
  
  # Method 2
  DANA.method2 <- DANA(data.RC=test.RC, 
                       data.norm=data.norm, 
                       pos.controls=pos.controls, 
                       neg.controls=neg.controls, 
                       clusters=clusters,
                       coords=coords,
                       case.name="test",
                       generate.plots=show.plots,
                       preprocess.transform=config$preprocess.transform,
                       corr.method.pos=method2.pos,
                       tuning.criterion.pos=tuning.method2.pos,
                       corr.method.neg=method2.neg,
                       tuning.criterion.neg=tuning.method2.neg) 
  
  if(!show.plots) {
    stargazer(DANA.method1$metrics, type="text", summary=FALSE, digits=4, 
              title="Comparison statistics for the test data set", align=TRUE)
    stargazer(DANA.method2$metrics, type="text", summary=FALSE, digits=4, 
              title="Comparison statistics for the test data set", align=TRUE)
  }
  
}

Result Plots

if(settings$compare.two.methods) {
  p.res.stats.method1 <- plot.DANA.metrics(DANA.method1$metrics, label.repel=TRUE) + ggtitle(method1.name)
  plot(p.res.stats.method1)
  
  p.res.stats.method2 <- plot.DANA.metrics(DANA.method2$metrics, label.repel=TRUE) + ggtitle(method2.name)
  plot(p.res.stats.method2)
}

Comparison of DANA results for different choices of control cutoffs

We compare the results of the DANA appraoch for different choices of the control cutoffs. We test three different configurations and plot the resulting DANA metrics for preservation of biological effects and removal of handling effects. We observe only small differences/variation of the results even though the controls are chosen considerably different.

if(settings$compare.cutoffs) {
  ## Configuration of three possible cutoff choices
  cutoff.conf1 <- list(
    t.zero = 2,  # zero-expressed in [0, t.zero)
    t.poor = 10,  # poorly-expressed in [t.zero, t.poor]
    t.well = 2^7  # well-expressed in [t.well, inf)
  )
  cutoff.conf2 <- list(
    t.zero = 1,  # zero-expressed in [0, t.zero)
    t.poor = 4,  # poorly-expressed in [t.zero, t.poor]
    t.well = 2^6  # well-expressed in [t.well, inf)
  )
  cutoff.conf3 <- list(
    t.zero = 3,  # zero-expressed in [0, t.zero)
    t.poor = 16,  # poorly-expressed in [t.zero, t.poor]
    t.well = 2^8  # well-expressed in [t.well, inf)
  )
  
  p.meanvar.conf1 <- plot.mean.sd(test.RC, cutoff.conf1$t.zero, 
                                  cutoff.conf1$t.poor, cutoff.conf1$t.well)
  print(p.meanvar.conf1)
  p.RC.conf1 <- plot.countHist(test.RC, binwidth = 0.1, cutoff.conf1$t.zero, 
                               cutoff.conf1$t.poor, cutoff.conf1$t.well)
  print(p.RC.conf1)
  
  p.meanvar.conf2 <- plot.mean.sd(test.RC, cutoff.conf2$t.zero, 
                                  cutoff.conf2$t.poor, cutoff.conf2$t.well)
  print(p.meanvar.conf2)
  p.RC.conf2 <- plot.countHist(test.RC, binwidth = 0.1, cutoff.conf2$t.zero, 
                               cutoff.conf2$t.poor, cutoff.conf2$t.well)
  print(p.RC.conf2)
  
  p.meanvar.conf3 <- plot.mean.sd(test.RC, cutoff.conf3$t.zero, 
                                  cutoff.conf3$t.poor, cutoff.conf3$t.well)
  print(p.meanvar.conf3)
  p.RC.conf3 <- plot.countHist(test.RC, binwidth = 0.1, cutoff.conf3$t.zero, 
                               cutoff.conf3$t.poor, cutoff.conf3$t.well)
  print(p.RC.conf3)
  
  
  ## Apply Normalization
  test.norm <- normalize(test.RC,
                         groups=groups,
                         name="test",
                         apply.TC=config$norm.apply.TC,
                         apply.UQ=config$norm.apply.UQ,
                         apply.med=config$norm.apply.med,
                         apply.TMM=config$norm.apply.TMM,
                         apply.DESeq=config$norm.apply.DESeq,
                         apply.PoissonSeq=config$norm.apply.PoissonSeq,
                         apply.QN=config$norm.apply.QN,
                         apply.RUV=config$norm.apply.RUV)
  
  ## Define Controls
  
  # config 1
  cutoffs.1 <- define.controls(test.RC,
                               t.zero=cutoff.conf1$t.zero,
                               t.poor=cutoff.conf1$t.poor,
                               t.well=cutoff.conf1$t.well,
                               t.cluster=config$cluster.threshold,
                               coords=coords,
                               clusters=NULL)
  pos.controls.1 <- cutoffs.1$genes.pos
  neg.controls.1 <- cutoffs.1$genes.neg
  clusters <- cutoffs.1$clusters
  
  # config 2
  cutoffs.2 <- define.controls(test.RC,
                               t.zero=cutoff.conf2$t.zero,
                               t.poor=cutoff.conf2$t.poor,
                               t.well=cutoff.conf2$t.well,
                               t.cluster=config$cluster.threshold,
                               coords=coords,
                               clusters=NULL)
  pos.controls.2 <- cutoffs.2$genes.pos
  neg.controls.2 <- cutoffs.2$genes.neg
  
  
  # config 3
  cutoffs.3 <- define.controls(test.RC,
                               t.zero=cutoff.conf3$t.zero,
                               t.poor=cutoff.conf3$t.poor,
                               t.well=cutoff.conf3$t.well,
                               t.cluster=config$cluster.threshold,
                               coords=coords,
                               clusters=NULL)
  pos.controls.3 <- cutoffs.3$genes.pos
  neg.controls.3 <- cutoffs.3$genes.neg
  
  
  
  ## Apply DANA pipeline
  
  # Config 1
  DANA.1 <- DANA(data.RC=test.RC, 
                 data.norm=test.norm, 
                 pos.controls=pos.controls.1, 
                 neg.controls=neg.controls.1, 
                 clusters=clusters,
                 coords=coords,
                 case.name="config1",
                 generate.plots=FALSE,
                 preprocess.transform=config$preprocess.transform,
                 corr.method.pos=config$corr.method.pos,
                 tuning.criterion.pos=config$tuning.criterion.pos,
                 corr.method.neg=config$corr.method.neg,
                 tuning.criterion.neg=config$tuning.criterion.neg) 
  rownames(DANA.1$metrics) <- substr(rownames(DANA.1$metrics), 6, 20)
  
  # Config 2
  DANA.2 <- DANA(data.RC=test.RC, 
                 data.norm=test.norm, 
                 pos.controls=pos.controls.2, 
                 neg.controls=neg.controls.2, 
                 clusters=clusters,
                 coords=coords,
                 case.name="config2",
                 generate.plots=FALSE,
                 preprocess.transform=config$preprocess.transform,
                 corr.method.pos=config$corr.method.pos,
                 tuning.criterion.pos=config$tuning.criterion.pos,
                 corr.method.neg=config$corr.method.neg,
                 tuning.criterion.neg=config$tuning.criterion.neg) 
  rownames(DANA.2$metrics) <- substr(rownames(DANA.2$metrics), 6, 20)
  
  # Config 3
  DANA.3 <- DANA(data.RC=test.RC, 
                 data.norm=test.norm, 
                 pos.controls=pos.controls.3, 
                 neg.controls=neg.controls.3, 
                 clusters=clusters,
                 coords=coords,
                 case.name="config3",
                 generate.plots=FALSE,
                 preprocess.transform=config$preprocess.transform,
                 corr.method.pos=config$corr.method.pos,
                 tuning.criterion.pos=config$tuning.criterion.pos,
                 corr.method.neg=config$corr.method.neg,
                 tuning.criterion.neg=config$tuning.criterion.neg) 
  rownames(DANA.3$metrics) <- substr(rownames(DANA.3$metrics), 6, 20)
  
  # Plot results

  p.result.DANA.1 <- plot.DANA.metrics(DANA.1$metrics, label.repel = TRUE)
  plot(p.result.DANA.1 + ggtitle("Configuration 1"))
  p.result.DANA.2 <- plot.DANA.metrics(DANA.2$metrics, label.repel = TRUE)
  plot(p.result.DANA.2 + ggtitle("Configuration 2"))
  p.result.DANA.3 <- plot.DANA.metrics(DANA.3$metrics, label.repel = TRUE)
  plot(p.result.DANA.3 + ggtitle("Configuration 3"))
}

## Warning: Removed 1 rows containing missing values (geom_bar).

## Warning: Removed 1 rows containing missing values (geom_bar).

## Warning: Removed 1 rows containing missing values (geom_bar).
## converting counts to integer mode

## 313 genes has been filtered because they contains too small number of reads across the experiments.Number of positive control markers with RC in [128, inf): 115
## Number of negative control markers with RC in [2, 10]: 102
## Number of positive control markers with RC in [64, inf): 150
## Number of negative control markers with RC in [1, 4]: 110
## Number of positive control markers with RC in [256, inf): 77
## Number of negative control markers with RC in [3, 16]: 103
## Estimating correlations for pos. and neg. controls for data set config1...done
## Estimating correlations for pos. and neg. controls for data set test.TMM...done
## Estimating correlations for pos. and neg. controls for data set test.DESeq...done
## Estimating correlations for pos. and neg. controls for data set test.PoissonSeq...done
## Estimating correlations for pos. and neg. controls for data set test.TC...done
## Estimating correlations for pos. and neg. controls for data set test.Med...done
## Estimating correlations for pos. and neg. controls for data set test.RUVg...done
## Estimating correlations for pos. and neg. controls for data set test.RUVs...done
## Estimating correlations for pos. and neg. controls for data set test.RUVr...done
## Estimating correlations for pos. and neg. controls for data set test.UQ...done
## Estimating correlations for pos. and neg. controls for data set test.QN...done
## Comparing models config1 vs. test.TMM....done
## Comparing models config1 vs. test.DESeq....done
## Comparing models config1 vs. test.PoissonSeq....done
## Comparing models config1 vs. test.TC....done
## Comparing models config1 vs. test.Med....done
## Comparing models config1 vs. test.RUVg....done
## Comparing models config1 vs. test.RUVs....done
## Comparing models config1 vs. test.RUVr....done
## Comparing models config1 vs. test.UQ....done
## Comparing models config1 vs. test.QN....done
## Estimating correlations for pos. and neg. controls for data set config2...done
## Estimating correlations for pos. and neg. controls for data set test.TMM...done
## Estimating correlations for pos. and neg. controls for data set test.DESeq...done
## Estimating correlations for pos. and neg. controls for data set test.PoissonSeq...done
## Estimating correlations for pos. and neg. controls for data set test.TC...done
## Estimating correlations for pos. and neg. controls for data set test.Med...done
## Estimating correlations for pos. and neg. controls for data set test.RUVg...done
## Estimating correlations for pos. and neg. controls for data set test.RUVs...done
## Estimating correlations for pos. and neg. controls for data set test.RUVr...done
## Estimating correlations for pos. and neg. controls for data set test.UQ...done
## Estimating correlations for pos. and neg. controls for data set test.QN...done
## Comparing models config2 vs. test.TMM....done
## Comparing models config2 vs. test.DESeq....done
## Comparing models config2 vs. test.PoissonSeq....done
## Comparing models config2 vs. test.TC....done
## Comparing models config2 vs. test.Med....done
## Comparing models config2 vs. test.RUVg....done
## Comparing models config2 vs. test.RUVs....done
## Comparing models config2 vs. test.RUVr....done
## Comparing models config2 vs. test.UQ....done
## Comparing models config2 vs. test.QN....done
## Estimating correlations for pos. and neg. controls for data set config3...done
## Estimating correlations for pos. and neg. controls for data set test.TMM...done
## Estimating correlations for pos. and neg. controls for data set test.DESeq...done
## Estimating correlations for pos. and neg. controls for data set test.PoissonSeq...done
## Estimating correlations for pos. and neg. controls for data set test.TC...done
## Estimating correlations for pos. and neg. controls for data set test.Med...done
## Estimating correlations for pos. and neg. controls for data set test.RUVg...done
## Estimating correlations for pos. and neg. controls for data set test.RUVs...done
## Estimating correlations for pos. and neg. controls for data set test.RUVr...done
## Estimating correlations for pos. and neg. controls for data set test.UQ...done
## Estimating correlations for pos. and neg. controls for data set test.QN...done
## Comparing models config3 vs. test.TMM....done
## Comparing models config3 vs. test.DESeq....done
## Comparing models config3 vs. test.PoissonSeq....done
## Comparing models config3 vs. test.TC....done
## Comparing models config3 vs. test.Med....done
## Comparing models config3 vs. test.RUVg....done
## Comparing models config3 vs. test.RUVs....done
## Comparing models config3 vs. test.RUVr....done
## Comparing models config3 vs. test.UQ....done
## Comparing models config3 vs. test.QN....done

Differential Expression Analysis

Normalize Data

if(settings$perform.DEA) {
  ## Reset data
  test.RC <- test 
  bench.RC <- benchmark  
  
  ## Normalize test Data
  test.norm <- normalize(test.RC,
                         groups=groups,
                         name="test",
                         apply.TC=config.DEA$norm.apply.TC,
                         apply.UQ=config.DEA$norm.apply.UQ,
                         apply.med=config.DEA$norm.apply.med,
                         apply.TMM=config.DEA$norm.apply.TMM,
                         apply.DESeq=config.DEA$norm.apply.DESeq,
                         apply.PoissonSeq=config.DEA$norm.apply.PoissonSeq,
                         apply.QN=config.DEA$norm.apply.QN,
                         apply.RUV=FALSE)
  test.norm.RUV.adjust <- list(test.RUVr=norm.RUV(test.RC, groups, "RUVr")$adjust.factor,
                               test.RUVs=norm.RUV(test.RC, groups, "RUVs")$adjust.factor,
                               test.RUVg=norm.RUV(test.RC, groups, "RUVg")$adjust.factor)
}
## converting counts to integer mode
## 328 genes has been filtered because they contains too small number of reads across the experiments.

Benchmark DEA

if(settings$perform.DEA) {
  ## Perform DEA on benchmark dataset
  bench.DEA <- DE.voom(data.RC=bench.RC, groups=groups, plot=config.DEA$generate.meanvar.plot)
  if(config.DEA$generate.volcano.plot) plot.DE.volcano(bench.DEA, alpha=config.DEA$alpha, q.values=config.DEA$q.values)
}

## number of DE genes: 59

Test DEA

if(settings$perform.DEA) {
  ## Perform DEA on full dataset
  test.DEA <- DE.voom(data.RC=test.RC, groups=groups, plot=config.DEA$generate.meanvar.plot, plot.title="test (no norm)")
  if(config.DEA$generate.volcano.plot) plot.DE.volcano(test.DEA, alpha=config.DEA$alpha, q.values=config.DEA$q.values, title="test (no norm)")
  
  ## DEA for normalized test data
  test.norm.DEA <- list()
  for(i in 1:length(test.norm)) {
    test.norm.DEA <- 
      append(test.norm.DEA, list(DE.voom(data.RC=test.norm[[i]], 
                                         groups=groups, 
                                         plot=config.DEA$generate.meanvar.plot,
                                         plot.title=names(test.norm)[i])))
    if(config.DEA$generate.volcano.plot) {
      plot.DE.volcano(test.norm.DEA[[i]], 
                      alpha=config.DEA$alpha, 
                      q.values=config.DEA$q.values, 
                      title=names(test.norm)[i])
    }
  }
  ## DEA for RUV normalization
  for(i in 1:length(test.norm.RUV.adjust)) {
    test.norm.DEA <- 
      append(test.norm.DEA, list(DE.voom(data.RC=test.RC, 
                                         groups=groups, 
                                         plot=config.DEA$generate.meanvar.plot,
                                         plot.title=names(test.norm.RUV.adjust)[i],
                                         adjust=test.norm.RUV.adjust[[i]])))
    if(config.DEA$generate.volcano.plot) {
      plot.DE.volcano(test.norm.DEA[[i]], 
                      alpha=config.DEA$alpha, 
                      q.values=config.DEA$q.values, 
                      title=names(test.norm.RUV.adjust)[i])
    }
  }
  names(test.norm.DEA) <- c(names(test.norm), names(test.norm.RUV.adjust))
}

## number of DE genes: 70

## number of DE genes: 39

## number of DE genes: 52

## number of DE genes: 39

## number of DE genes: 49

## number of DE genes: 54

## number of DE genes: 51

## number of DE genes: 51

## number of DE genes: 39

## number of DE genes: 52

## number of DE genes: 39

Results

if(settings$perform.DEA) {
  ## Compare test (non-norm) to benchmark
  test.norm.DEA.stats <- list(test=compare.DEAs(DEA=test.DEA,
                                           truth.DEA=bench.DEA,
                                           alpha=config.DEA$alpha))
  ## Compare normalized test data to benchmark
  for(i in 1:length(test.norm.DEA)) {
    test.norm.DEA.stats <- 
      append(test.norm.DEA.stats, 
             list(compare.DEAs(DEA=test.norm.DEA[[i]],
                               truth.DEA=bench.DEA,
                               alpha=config.DEA$alpha,
                               subset=switch(config.DEA$perform.subsetting+1,
                                             NULL,
                                             rownames(test.norm.DEA[[i]])))))
  }
  names(test.norm.DEA.stats) <- c("test.No Norm", names(test.norm.DEA))
  test.norm.DEA.stats <- test.norm.DEA.stats[1:length(test.norm.DEA.stats)]
  
  
  ## Visualize results
  
  # FNR-FDR Plot
  test.DEA.res <- data.frame(method=substr(names(test.norm.DEA.stats), 6, 20),
                             FDR=-sapply(test.norm.DEA.stats, function(x) x$FDR)+1,
                             FNR=-sapply(test.norm.DEA.stats, function(x) x$FNR)+1)
  p.test.DEA.FNR.FDR <- ggplot(test.DEA.res, aes(x=FNR, y=FDR, label=method)) +
      theme_bw() + 
      geom_point(alpha=1) + 
      xlab("Sensitivity") + # True positive rate(1-FNR)
      ylab("Precision") + # Positive predictive value (1-FDR)
      geom_text_repel(aes(label = method), size=3) +
      scale_x_continuous(labels = scales::percent, limits=c(0,1),
                         breaks = scales::pretty_breaks(n = 4)) +
      scale_y_continuous(labels = scales::percent, limits = c(0,1))
  print(p.test.DEA.FNR.FDR)
  
  # TPR-FPR Plot
  test.DEA.res <- data.frame(method=substr(names(test.norm.DEA.stats), 6, 20),
                             TPR=sapply(test.norm.DEA.stats, function(x) x$TPR),
                             FPR=-sapply(test.norm.DEA.stats, function(x) x$FPR)+1)
  p.test.DEA.TPR.FPR <- ggplot(test.DEA.res, aes(x=TPR, y=FPR, label=method)) +
      theme_bw() + 
      geom_point(alpha=1) + 
      xlab("Sensitivity") + 
      ylab("Specificity") + 
      geom_text_repel(aes(label = method), size=3) +
      scale_x_continuous(labels = scales::percent, limits=c(0,1),
                         breaks = scales::pretty_breaks(n = 4)) +
      scale_y_continuous(labels = scales::percent, limits = c(0,1))
  print(p.test.DEA.TPR.FPR)
  
  # CAT Plot
  d <- append(list("test.No Norm"=test.DEA), test.norm.DEA)
  names(d) <- substr(names(d), 6, 20)
  p.test.DEA.CAT <- plot.CAT(DEA=d, 
                             truth.DEA=bench.DEA, 
                             title="Significance ranks of miRNAs",
                             maxrank=100)
  print(p.test.DEA.CAT)
  rm(d)
}

Results and Figures for Paper

Number of samples, markers, and positive and negative controls

if(settings$generate.paper.figs) {
  
# Dimension of data after analysis
cat("Test data:\n")
cat("  - Dimensions: p=", dim(test.RC)[1],", n=", dim(test.RC)[2], "\n", sep="")
cat("  - Positive controls:\n")
cat("    * Definition mean RC in interval: [", config$t.well, ", inf ]\n", sep="")
cat("    * Number of positive controls miRNAs:", length(pos.controls.test), "\n")
cat("  - Negative controls:\n")
cat("    * Definition mean RC in interval: [", config$t.zero, ",", config$t.poor, "]\n")
cat("    * Number of negative controls miRNAs:", length(neg.controls.test), "\n")


cat("\n\nBenchmark data:\n")
cat("  - Dimensions: p=", dim(bench.RC)[1],", n=", dim(bench.RC)[2], "\n", sep="")
cat("  - Positive controls:\n")
cat("    * Definition mean RC in interval: [", config$t.well, ", inf ]\n", sep="")
cat("    * Number of positive controls miRNAs:", length(pos.controls.bench), "\n")
cat("  - Negative controls:\n")
cat("    * Definition mean RC in interval: [", config$t.zero, ",", config$t.poor, "]\n")
cat("    * Number of negative controls miRNAs:", length(neg.controls.bench), "\n")

}
## Test data:
##   - Dimensions: p=1033, n=54
##   - Positive controls:
##     * Definition mean RC in interval: [128, inf ]
##     * Number of positive controls miRNAs: 115 
##   - Negative controls:
##     * Definition mean RC in interval: [ 2 , 10 ]
##     * Number of negative controls miRNAs: 102 
## 
## 
## Benchmark data:
##   - Dimensions: p=1033, n=54
##   - Positive controls:
##     * Definition mean RC in interval: [128, inf ]
##     * Number of positive controls miRNAs: 179 
##   - Negative controls:
##     * Definition mean RC in interval: [ 2 , 10 ]
##     * Number of negative controls miRNAs: 120

Read Count Distribution in the Data

Per Data-Set

if(settings$generate.paper.figs) {
  par(mfrow=c(1,1))
  
  # Histogram plot test data set
  test.RC.log2 <- log2(rowMeans(test.RC)+1)
  df <- data.frame(log.expression=test.RC.log2)
  p.test.RC.hist <- ggplot(df, aes(x=log.expression)) + 
    geom_histogram(binwidth = .1, color="black", fill="black") +
    geom_vline(xintercept = log2(config$t.zero+1), color="#3051ab", linetype="twodash") +
    geom_vline(xintercept = log2(config$t.poor+1), color="#3051ab", linetype="twodash") +
    geom_vline(xintercept = log2(config$t.well+1), color="9e086c",  linetype="longdash") +
    ggtitle("Test data set") +
    theme_classic()
  print(p.test.RC.hist)
  rm(df)

  
  # Histogram plot benchmark data set
  bench.RC.log2 <- log2(rowMeans(bench.RC)+1)
  df <- data.frame(log.expression=bench.RC.log2)
  p.bench.RC.hist <- ggplot(df, aes(x=log.expression)) + 
    geom_histogram(binwidth = .1, color="black", fill="black") +
    geom_vline(xintercept = log2(config$t.zero+1), color="#3051ab", linetype="twodash") +
    geom_vline(xintercept = log2(config$t.poor+1), color="#3051ab", linetype="twodash") +
    geom_vline(xintercept = log2(config$t.well+1), color="#9e086c",  linetype="longdash") +
    ggtitle("Benchmark data set") +
    theme_classic()
  print(p.bench.RC.hist)
  rm(df)
  
  
  # Combined read count histogram for test and benchmark data
  df <- data.frame(bench=bench.RC.log2, test=test.RC.log2)
  p.RC.hist <- ggplot(df) + 
    theme_classic() +
    geom_histogram(aes(x=test, y=..count..,fill="#69b3a2"), binwidth = .1) +
    geom_histogram(aes(x=bench, y=-..count.., fill="#404080"), binwidth = .1) +
    geom_vline(xintercept = log2(config$t.zero+1), color="#5851b8", linetype="twodash") +
    geom_vline(xintercept = log2(config$t.poor+1), color="#5851b8", linetype="twodash") +
    geom_vline(xintercept = log2(config$t.well+1), color="#E7298A",  linetype="longdash") +
    geom_segment(aes(x = log2(config$t.zero+1), y = 75, xend = log2(config$t.poor+1), yend = 75),
                 arrow=arrow(length=unit(.07, "in"), ends="both"),
                 color="#5851b8") +
    annotate(geom="label", x=(log2(config$t.zero+1)+log2(config$t.poor+1))/2.0, y=84,
             label="neg. contr.", color="#5851b8", size=4, fill = alpha(c("white"),0.85), label.size = NA) +
    geom_segment(aes(x = log2(config$t.well+1), y = 75, xend = 10, yend = 75),
                 arrow=arrow(length=unit(.07, "in"), ends="last"),
                 color="#E7298A") +
    annotate(geom="label", x=log2(config$t.well+1)+.5, y=84,
             label="pos. contr.", color="#E7298A", size=4, hjust=0, fill = alpha(c("white"),0.85), label.size = NA) +
    # ylim(c(-90,90)) +
    scale_fill_manual(name="Data set",values=c("#404080", "#69b3a2"), labels=c("benchmark", "test"), guide = guide_legend(reverse=TRUE)) +
    theme(legend.position=c(0.86,0.86), 
          # legend.title=element_blank(),
          legend.background=element_rect(fill=alpha('white', 0.9))) +
    scale_y_continuous(breaks=c(-50,0,50), labels=c(50,0,50), limits=c(-95,95)) +
    xlab("Mean log2(read count)") +
    ylab("Frequency")
  print(p.RC.hist)
  rm(df)
  
    ## Export Plots
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "MSK_RC_Distribution.pdf"), p.RC.hist, width = 5, height=4, device="pdf")
  }
  
}

Group Means comparison between test and benchmark

if(settings$generate.paper.figs) {
  par(mfrow=c(1,1))
  
  ## log2(mean(RC))
  
  # Histogram plot test data set
  test.RC.log2 <- log2(rowMeans(test.RC)+1)
  bench.RC.log2 <- log2(rowMeans(bench.RC)+1)
  df <- data.frame(test=test.RC.log2, benchmark=bench.RC.log2)
  p.RC.comparison.1 <- ggplot(df,aes(x=test,y=benchmark)) + 
    geom_point(alpha=.25) + 
    xlab("test") + 
    ylab("benchmark") + 
    ggtitle("log2(mean(read count) + 1)") +
      geom_abline(intercept = 0, slope = 1, color="black", linetype="longdash", size=.5) +
    theme(aspect.ratio = 1) +
    theme_classic()
  print(p.RC.comparison.1)
  rm(df,test.RC.log2, bench.RC.log2)
  
  # Histogram plot test data set (PMFH only)
  test.RC.log2 <- log2(rowMeans(test.RC[, groups=="PMFH"])+1)
  bench.RC.log2 <- log2(rowMeans(bench.RC[, groups=="PMFH"])+1)
  df <- data.frame(test=test.RC.log2, benchmark=bench.RC.log2)
  p.RC.comparison.PMFH.1 <- ggplot(df,aes(x=test,y=benchmark)) + 
    geom_point(alpha=.25) + 
    xlab("test (PMFH)") + 
    ylab("benchmark (PMFH)") + 
    ggtitle(" PMFH  ---  log2(mean(read count) + 1)") +
      geom_abline(intercept = 0, slope = 1, color="black", linetype="longdash", size=.5) +
    theme(aspect.ratio = 1) +
    theme_classic()
  print(p.RC.comparison.PMFH.1)
  rm(df,test.RC.log2, bench.RC.log2)
  
  # Histogram plot test data set (MXF only)
  test.RC.log2 <- log2(rowMeans(test.RC[, groups=="MXF"])+1)
  bench.RC.log2 <- log2(rowMeans(bench.RC[, groups=="MXF"])+1)
  df <- data.frame(test=test.RC.log2, benchmark=bench.RC.log2)
  p.RC.comparison.MXF.1 <- ggplot(df,aes(x=test,y=benchmark)) + 
    geom_point(alpha=.25) + 
    xlab("test (MXF)") + 
    ylab("benchmark (MXF)") + 
    ggtitle(" MXF  ---  log2(mean(read count) + 1)") +
      geom_abline(intercept = 0, slope = 1, color="black", linetype="longdash", size=.5) +
    theme(aspect.ratio = 1) +
    theme_classic()
  print(p.RC.comparison.MXF.1)
  rm(df,test.RC.log2, bench.RC.log2)
  
  
  
  ## log2(mean(RC))
  
  # Histogram plot test data set
  test.RC.log2 <- rowMeans(log2(test.RC+1))
  bench.RC.log2 <- rowMeans(log2(bench.RC+1))
  df <- data.frame(test=test.RC.log2, benchmark=bench.RC.log2)
  p.RC.comparison.2 <- ggplot(df,aes(x=test,y=benchmark)) + 
    geom_point(alpha=.25) + 
    xlab("test") + 
    ylab("benchmark") + 
    ggtitle("mean(log2(read count + 1))") +
      geom_abline(intercept = 0, slope = 1, color="black", linetype="longdash", size=.5) +
    theme(aspect.ratio = 1) +
    theme_classic()
  print(p.RC.comparison.2)
  rm(df,test.RC.log2, bench.RC.log2)
  
  # Histogram plot test data set (PMFH only)
  test.RC.log2 <- rowMeans(log2(test.RC[, groups=="PMFH"]+1))
  bench.RC.log2 <- rowMeans(log2(bench.RC[, groups=="PMFH"]+1))
  df <- data.frame(test=test.RC.log2, benchmark=bench.RC.log2)
  p.RC.comparison.PMFH.2 <- ggplot(df,aes(x=test,y=benchmark)) + 
    geom_point(alpha=.25) + 
    xlab("test (PMFH)") + 
    ylab("benchmark (PMFH)") + 
    ggtitle(" PMFH  ---  mean(log2(read count + 1))") +
      geom_abline(intercept = 0, slope = 1, color="black", linetype="longdash", size=.5) +
    theme(aspect.ratio = 1) +
    theme_classic()
  print(p.RC.comparison.PMFH.2)
  rm(df,test.RC.log2, bench.RC.log2)
  
  # Histogram plot test data set (MXF only)
  test.RC.log2 <- rowMeans(log2(test.RC[, groups=="MXF"]+1))
  bench.RC.log2 <- rowMeans(log2(bench.RC[, groups=="MXF"]+1))
  df <- data.frame(test=test.RC.log2, benchmark=bench.RC.log2)
  p.RC.comparison.MXF.2 <- ggplot(df,aes(x=test,y=benchmark)) + 
    geom_point(alpha=.25) + 
    xlab("test (MXF)") + 
    ylab("benchmark (MXF)") + 
    ggtitle(" MXF  ---  mean(log2(read count + 1))") +
      geom_abline(intercept = 0, slope = 1, color="black", linetype="longdash", size=.5) +
    theme(aspect.ratio = 1) +
    theme_classic()
  print(p.RC.comparison.MXF.2)
  rm(df,test.RC.log2, bench.RC.log2)
  
    ## Export Plots
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "MSK_RC_Comparison_all1.pdf"), p.RC.comparison.2, width = 5, height=5, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "MSK_RC_Comparison_PMFH1.pdf"), p.RC.comparison.PMFH.2, width = 5, height=5, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "MSK_RC_Comparison_MXF1.pdf"), p.RC.comparison.MXF.2, width = 5, height=5, device="pdf")
  }
  
}

Mean-Variance Plot

Distribution property among technical replicates

if(settings$generate.paper.figs) {
  # Function for mean-variance plot for a given data set
  mean.variance.plot <- function(data.RC, title, axis.limit=NULL) {
    df <- data.frame(data.mean=log10(rowMeans(data.RC) + 1),
                     data.var =log10(rowVars(data.RC) + 1))
    lin.fit <- lm(df$data.var ~ df$data.mean)$coefficients
    p <- ggplot(df,aes(x=data.mean,y=data.var)) + 
      geom_point(alpha=.25) + 
      xlab("log10(Mean)") + 
      ylab("log10(Variance)") + 
      geom_abline(intercept = lin.fit[1], slope = lin.fit[2], color="red", linetype="longdash", size=1) +
      geom_abline(intercept = 0, slope = 1, colour="blue") +
      annotate("text", alpha=1, x=4, y=0.5, hjust=0, vjust=0, size=3, colour="red",
               label=paste0("log10(Variance) = ", round(lin.fit[1],2), " + ", round(lin.fit[2],2), "*log10(Mean)")) + 
      xlim(0, ifelse(is.null(axis.limit), max(df), axis.limit)) +
      ylim(0, ifelse(is.null(axis.limit), max(df), axis.limit)) +
      ggtitle(title) +
      theme(aspect.ratio=1, plot.title = element_text(hjust = 0.5)) +
      theme_classic()
    return(p)
  }
  
  ## Test data
  # PMFH and MXF
  p.meanvar.test <- mean.variance.plot(test.RC, "Test Data", axis.limit=12.5)
  print(p.meanvar.test)
  # PMFH
  p.meanvar.test.PMFH <- mean.variance.plot(test.RC[, groups == "PMFH"], "Test Data - PMFH", axis.limit=12.5)
  print(p.meanvar.test.PMFH)
  # MXF
  p.meanvar.test.MXF <- mean.variance.plot(test.RC[, groups == "MXF"], "Test Data - MXF", axis.limit=12.5)
  print(p.meanvar.test.MXF)
  
  ## Benchmark data
  # PMFH and MXF
  p.meanvar.bench <- mean.variance.plot(bench.RC, "Benchmark Data", axis.limit=12.5)
  print(p.meanvar.bench)
  # PMFH
  p.meanvar.bench.PMFH <- mean.variance.plot(bench.RC[, groups == "PMFH"], "Benchmark Data - PMFH", axis.limit=12.5)
  print(p.meanvar.bench.PMFH)
  # MXF
  p.meanvar.bench.MXF <- mean.variance.plot(bench.RC[, groups == "MXF"], "Benchmark Data - MXF", axis.limit=12.5)
  print(p.meanvar.bench.MXF)
}

Marker-specific sd vs mean

if(settings$generate.paper.figs) {
  
  ## Test data
  # PMFH and MXF
  p.meanvar.test <- plot.mean.sd(test.RC, config$t.zero, config$t.poor, config$t.well)
  print(p.meanvar.test)
  # PMFH
  p.meanvar.test.PMFH <- plot.mean.sd(test.RC[, groups == "PMFH"], config$t.zero, config$t.poor, config$t.well, "Test Data - PMFH")
  print(p.meanvar.test.PMFH)
  # MXF
  p.meanvar.test.MXF <- plot.mean.sd(test.RC[, groups == "MXF"], config$t.zero, config$t.poor, config$t.well, "Test Data - MXF")
  print(p.meanvar.test.MXF)
  
  ## Benchmark data
  # PMFH and MXF
  p.meanvar.bench <- plot.mean.sd(bench.RC, config$t.zero, config$t.poor, config$t.well, "Benchmark Data")
  print(p.meanvar.bench)
  # PMFH
  p.meanvar.bench.PMFH <- plot.mean.sd(bench.RC[, groups == "PMFH"], config$t.zero, config$t.poor, config$t.well, "Benchmark Data - PMFH")
  print(p.meanvar.bench.PMFH)
  # MXF
  p.meanvar.bench.MXF <- plot.mean.sd(bench.RC[, groups == "MXF"], config$t.zero, config$t.poor, config$t.well, "Benchmark Data - MXF")
  print(p.meanvar.bench.MXF)
  
  ## Export Plots
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "MSK_Test_MeanSD.pdf"), p.meanvar.test, width = 5, height=4, device="pdf")
  }
}

Correlation Plots

if(settings$generate.paper.figs) {
  num.genes.plot.pos <- 60
  num.genes.plot.neg <- 60

  # Co-expression models
  bench.model <- DANA.bench$data.model
  test.model <- DANA.test$data.model
  test.norm.model <- DANA.test$norm.models$test.TMM
  
  # Common control miRNAs
  pos.controls.bench <- rownames(bench.model$pos$corr)
  pos.controls.test <- rownames(test.model$pos$corr)
  pos.controls.common <- intersect(pos.controls.test, pos.controls.bench)
  neg.controls.bench <- rownames(bench.model$neg$corr)
  neg.controls.test <- rownames(test.model$neg$corr)

  # reduce the number of genes for the plot
  pos.controls.plot <- pos.controls.common[2:num.genes.plot.pos+1]
  
  num.genes.plot.neg <- min(num.genes.plot.neg,
                            dim(test.model$neg$corr)[1],
                            dim(bench.model$neg$corr)[1])
  
  # Subsetted correlation matrices
  corr.bench.pos <- bench.model$pos$corr[pos.controls.plot, pos.controls.plot]
  corr.test.pos <- test.model$pos$corr[pos.controls.plot, pos.controls.plot]
  corr.test.norm.pos <- test.norm.model$pos$corr[pos.controls.plot, pos.controls.plot]
  corr.bench.neg <- bench.model$neg$corr[1:num.genes.plot.neg, 1:num.genes.plot.neg]
  corr.test.neg <- test.model$neg$corr[1:num.genes.plot.neg, 1:num.genes.plot.neg]
  corr.test.norm.neg <- test.norm.model$neg$corr[1:num.genes.plot.neg, 1:num.genes.plot.neg]
  
  ### Generate plots
  # Positive controls
  p.corr.pos.bench <- ggplot.corr(corr.bench.pos, 
                                  clusters=clusters, 
                                  threshold=0, 
                                  title="benchmark (un-normalized)")
  p.corr.pos.test <- ggplot.corr(corr.test.pos, 
                                 clusters=clusters, 
                                 threshold=0, 
                                 title="test (un-normalized)")
  p.corr.pos.test.norm <- ggplot.corr(corr.test.norm.pos, 
                                     clusters=clusters, 
                                     threshold=0, 
                                     title="test (TMM-normalized)")
  
  p.corr.pos <- arrangeGrob(p.corr.pos.bench + theme(legend.position="none"), 
                            p.corr.pos.test + theme(legend.position="none"),
                            get.legend(p.corr.pos.bench), 
                            widths = c(2,2,1))
  grid.arrange(p.corr.pos)  # display plot
  
  p.corr.pos.three <- arrangeGrob(p.corr.pos.bench + theme(legend.position="none"), 
                                  p.corr.pos.test.norm + theme(legend.position="none"),
                                  p.corr.pos.test + theme(legend.position="none"),
                                  get.legend(p.corr.pos.bench + theme(legend.position = "bottom")),
                                  layout_matrix=rbind(c(1,2,3), c(4,4,4)),
                                  heights = c(5,1))
  grid.arrange(p.corr.pos.three)  # display plot
  
  # Negative controls
  p.corr.neg.bench <- ggplot.corr(corr.bench.neg, 
                                  clusters=clusters, 
                                  threshold=0, 
                                  title="  benchmark (un-normalized)")
  p.corr.neg.test <- ggplot.corr(corr.test.neg, 
                                 clusters=clusters, 
                                 threshold=0, 
                                 title="  test (un-normalized)")
  p.corr.neg.test.norm <- ggplot.corr(corr.test.norm.neg, 
                                     clusters=clusters, 
                                     threshold=0, 
                                     title="  test (TMM)")
  
  p.corr.neg <- arrangeGrob(p.corr.neg.bench + theme(legend.position="none"), 
                            p.corr.neg.test + theme(legend.position="none"),
                            get.legend(p.corr.neg.bench),
                            widths = c(2,2,1))
  grid.arrange(p.corr.neg)  # display plot
  
  
  p.corr.neg.three <- arrangeGrob(p.corr.neg.bench + theme(legend.position="none"), 
                                  p.corr.neg.test.norm + theme(legend.position="none"),
                                  p.corr.neg.test + theme(legend.position="none"),
                                  get.legend(p.corr.neg.bench + theme(legend.position = "bottom")),
                                  layout_matrix=rbind(c(1,1,2,2,3,3), c(4,4,4,4,4,4)),
                                  heights = c(5,1))
  grid.arrange(p.corr.neg.three)  # display plot
  
  
  ## Export Plots
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "MSK_CorrPos.pdf"), p.corr.pos, width = 8, height=3.5, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "MSK_CorrNeg.pdf"), p.corr.neg, width = 8, height=3.5, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "MSK_CorrPos_TMM.pdf"), p.corr.pos.three, width = 8, height=3.5, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "MSK_CorrNeg_TMM.pdf"), p.corr.neg.three, width = 8, height=3.5, device="pdf")
  }
}
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

All normalization methods

if(settings$generate.paper.figs) { 
  p.corr.test <- ggplot.corr(DANA.test$data.model$pos$corr,
                             clusters=clusters,
                             title="un-normalized")
  p.corr.bench <- ggplot.corr(DANA.bench$data.model$pos$corr,
                              clusters=clusters,
                              title="benchmark data (un-normalized)")
  p.corr.TMM <- ggplot.corr(DANA.test$norm.models$test.TMM$pos$corr,
                            clusters=clusters,
                            title="TMM")
  p.corr.DESeq <- ggplot.corr(DANA.test$norm.models$test.DESeq$pos$corr,
                            clusters=clusters,
                            title="DESeq")
  p.corr.PoissonSeq <- ggplot.corr(DANA.test$norm.models$test.PoissonSeq$pos$corr,
                            clusters=clusters,
                            title="PoissonSeq")
  p.corr.TC <- ggplot.corr(DANA.test$norm.models$test.TC$pos$corr,
                            clusters=clusters,
                            title="TC")
  p.corr.Med <- ggplot.corr(DANA.test$norm.models$test.Med$pos$corr,
                            clusters=clusters,
                            title="Med")
  p.corr.RUVg <- ggplot.corr(DANA.test$norm.models$test.RUVg$pos$corr,
                            clusters=clusters,
                            title="RUVg")
  p.corr.RUVs <- ggplot.corr(DANA.test$norm.models$test.RUVs$pos$corr,
                            clusters=clusters,
                            title="RUVs")
  p.corr.RUVr <- ggplot.corr(DANA.test$norm.models$test.RUVr$pos$corr,
                            clusters=clusters,
                            title="RUVr")
  p.corr.UQ <- ggplot.corr(DANA.test$norm.models$test.UQ$pos$corr,
                            clusters=clusters,
                            title="UQ")
  p.corr.QN <- ggplot.corr(DANA.test$norm.models$test.QN$pos$corr,
                            clusters=clusters,
                            title="QN")
  
  # Arrange plots
  p.corr.test.all <-
    plot_grid(p.corr.test + theme(legend.position="none"),
              p.corr.TMM + theme(legend.position="none"),
              p.corr.DESeq + theme(legend.position="none"),
              p.corr.UQ + theme(legend.position="none"),
              p.corr.TC + theme(legend.position="none"),
              p.corr.Med + theme(legend.position="none"),
              p.corr.RUVg + theme(legend.position="none"),
              p.corr.RUVs + theme(legend.position="none"),
              p.corr.RUVr + theme(legend.position="none"),
              p.corr.PoissonSeq + theme(legend.position="none"),
              p.corr.QN + theme(legend.position="none"),
              get.legend(p.corr.test + theme(legend.position = "bottom")),
              ncol=3)
  plot(p.corr.test.all)
  
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "MSK_Test_Corr_All.pdf"), p.corr.test.all, width=9, height=12, device="pdf")
  }
}
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

Correlation Frequency Plots

Subset for paper

if(settings$generate.paper.figs) {
  # Co-expression models
  bench.model <- DANA.bench$data.model
  bench.norm.model1 <- DANA.bench$norm.models$benchmark.RUVr
  bench.norm.model2 <- DANA.bench$norm.models$benchmark.QN
  test.model <- DANA.test$data.model
  test.norm.model1 <- DANA.test$norm.models$test.RUVr
  test.norm.model2 <- DANA.test$norm.models$test.QN
  
  # Set number of genes for negative correlation to minimum
  num.genes.plot.neg <- min(dim(test.model$neg$corr)[1],
                            dim(bench.model$neg$corr)[1],
                            dim(test.norm.model1$neg$corr)[1],
                            dim(test.norm.model2$neg$corr)[1],
                            dim(bench.norm.model1$neg$corr)[1],
                            dim(bench.norm.model2$neg$corr)[1])
  
  # Subsetted correlation matrices
  corr.bench.neg <- bench.model$neg$corr[1:num.genes.plot.neg, 1:num.genes.plot.neg]
  corr.bench.norm1.neg <- bench.norm.model1$neg$corr[1:num.genes.plot.neg, 1:num.genes.plot.neg]
  corr.bench.norm2.neg <- bench.norm.model2$neg$corr[1:num.genes.plot.neg, 1:num.genes.plot.neg]
  corr.test.neg <- test.model$neg$corr[1:num.genes.plot.neg, 1:num.genes.plot.neg]
  corr.test.norm1.neg <- test.norm.model1$neg$corr[1:num.genes.plot.neg, 1:num.genes.plot.neg]
  corr.test.norm2.neg <- test.norm.model2$neg$corr[1:num.genes.plot.neg, 1:num.genes.plot.neg]
  
  # Reduce to upper diagonal part
  corr.test.neg <- corr.test.neg[upper.tri(corr.test.neg)]
  corr.test.norm1.neg <- corr.test.norm1.neg[upper.tri(corr.test.norm1.neg)]
  corr.test.norm2.neg <- corr.test.norm2.neg[upper.tri(corr.test.norm2.neg)]
  corr.bench.neg <- corr.bench.neg[upper.tri(corr.bench.neg)]
  corr.bench.norm1.neg <- corr.bench.norm1.neg[upper.tri(corr.bench.norm1.neg)]
  corr.bench.norm2.neg <- corr.bench.norm2.neg[upper.tri(corr.bench.norm2.neg)]
  
  
  ## direct comparison of negative controls benchmark vs test vs test.norm1
  neg.corr <- data.frame(
    control=factor(c(rep("Benchmark", length(corr.bench.neg)), 
                     rep("Test", length(corr.test.neg)),
                     rep("Test (RUVr)", length(corr.test.norm1.neg)))),
    corr=c(corr.bench.neg, corr.test.neg, corr.test.norm1.neg)
    )
  
  p.density.neg.freq <- ggplot(neg.corr, aes(x=corr, colour=control, linetype=control)) +
    geom_freqpoly(binwidth=.05, alpha=.8, size=.9) +
    theme_classic() +
    xlim(-1, 1) +
    scale_linetype_manual(values=c("solid", "solid", "twodash"))+
    scale_color_manual(values=c('#1B9E77','#D95F02','#D95F02')) + # First 2 colors from rcolorbrewer set "Dark2"
    theme(legend.position=c(0.85,0.9), 
          legend.title=element_blank(),
          legend.background=element_rect(fill=alpha('white', 0.5))) +
    ylab("Frequency") +
    xlab("Marginal correlation") +
    geom_vline(xintercept = 0, color="black", linetype="dashed")
  print(p.density.neg.freq)
  
  
  ## direct comparison of negative controls benchmark vs test vs test.norm1 vs test.norm2
  neg.corr <- data.frame(
    control=factor(c(rep("Benchmark", length(corr.bench.neg)), 
                     rep("Test", length(corr.test.neg)),
                     rep("Test (RUVr)", length(corr.test.norm1.neg)),
                     rep("Test (QN)", length(corr.test.norm2.neg)))),
    corr=c(corr.bench.neg, corr.test.neg, corr.test.norm1.neg, corr.test.norm2.neg)
    )
  
  p.density.neg.freq <- ggplot(neg.corr, aes(x=corr, colour=control, linetype=control)) +
    geom_freqpoly(binwidth=.075, alpha=.8, size=.9) +
    theme_classic() +
    xlim(-1, 1) +
    scale_color_manual(values=c('#e7298a','#1B9E77','#D95F02', '#7570B3')) + # First colors from rcolorbrewer set "Dark2"
    theme(legend.position=c(0.85,0.86), 
          legend.title=element_blank(),
          legend.background=element_rect(fill=alpha('white', 0.5))) +
    theme(legend.key.width = unit(1.25,"cm")) +
    ylab("Frequency") +
    xlab("Marginal correlation")  +
    geom_vline(xintercept = 0, color="black", linetype="dashed")
  print(p.density.neg.freq)
  
  
  
  # Additional Plot with normalized benchmark
  neg.corr <- data.frame(
    control=factor(c(rep("Benchmark", length(corr.bench.neg)), 
                     rep("Benchmark (RUVr)", length(corr.bench.norm1.neg)),
                     rep("Benchmark (QN)", length(corr.bench.norm2.neg)),
                     rep("Test", length(corr.test.neg)),
                     rep("Test (RUVr)", length(corr.test.norm1.neg)),
                     rep("Test (QN)", length(corr.test.norm2.neg))),
                   levels=c("Benchmark", "Benchmark (RUVr)", "Benchmark (QN)", "Test", 
                            "Test (RUVr)", "Test (QN)")),
    corr=c(corr.bench.neg, corr.bench.norm1.neg, corr.bench.norm2.neg, corr.test.neg, corr.test.norm1.neg, corr.test.norm2.neg)
    )
  pallette <- brewer.pal(n=6, name="Paired")[c(1,2,4,5,6,8)]
  pallette[c(3,6)] <- c("cyan2", "magenta3")
  pallette <- c("steelblue3", "dodgerblue4", "cyan2", "indianred1", "red3", "magenta3")
  p.density.neg.freq.paper <- ggplot(neg.corr, aes(x=corr, colour=control, linetype=control, size=control)) + 
    geom_vline(xintercept = 0, size=.3, color="gray85", linetype="solid") +
    geom_freqpoly(binwidth=.05, alpha=1) +
    theme_classic() +
    xlim(-1, 1) +
    theme(legend.position=c(0.22,0.8), 
          legend.title=element_blank(),
          legend.background=element_rect(fill=alpha('white', 0.5)),
          legend.key.width = unit(1.25,"cm")) +
    ylab("Frequency") +
    xlab("Marginal correlation") +
    scale_size_manual(values = c(.8,.4,.4,.8,.4,.4)) +
    scale_linetype_manual(values=c("solid", "twodash", "dashed", "solid", "twodash", "dashed")) +
    scale_color_manual(values=pallette) 
    
  print(p.density.neg.freq.paper)
  
  
  
  if(settings$export.figures) {
    # ggsave(paste0(settings$paper.fig.path, "MSK_CorrDensityNeg_Hist.pdf"), p.density.neg.hist, width = 5, height=4, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "MSK_CorrDensityNeg_Freq.pdf"), p.density.neg.freq, width = 5, height=4, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "MSK_CorrDensityNeg_Freq_WithBench.pdf"), p.density.neg.freq.paper, width = 5, height=4, device="pdf")
  }
}
## Warning: Removed 6 row(s) containing missing values (geom_path).

## Warning: Removed 8 row(s) containing missing values (geom_path).

## Warning: Removed 12 row(s) containing missing values (geom_path).
## Warning: Removed 8 row(s) containing missing values (geom_path).
## Warning: Removed 12 row(s) containing missing values (geom_path).

All methods (for supplement)

if(settings$generate.paper.figs) {
  models <- list(
    "test" = DANA.test$data.model,
    "benchmark" = DANA.bench$data.model,
    "test (TMM)" = DANA.test$norm.models$test.TMM,
    "test (DESeq)" = DANA.test$norm.models$test.DESeq,
    "test (PoissonSeq)" = DANA.test$norm.models$test.PoissonSeq,
    "test (RUVg)" = DANA.test$norm.models$test.RUVg,
    "test (RUVs)" = DANA.test$norm.models$test.RUVs,
    "test (RUVr)" = DANA.test$norm.models$test.RUVr,
    "test (TC)" = DANA.test$norm.models$test.TC,
    "test (Med)" = DANA.test$norm.models$test.Med,
    "test (UQ)" = DANA.test$norm.models$test.UQ,
    "test (QN)" = DANA.test$norm.models$test.QN
  )
  
  # Set number of genes for negative correlation to minimum found
  num.genes.plot <- min(sapply(models, function(x) dim(x$neg$corr)[1]))
  
  # Subsetted correlation matrices
  corrs <- lapply(models, function(x) x$neg$corr[1:num.genes.plot, 1:num.genes.plot])
  corrs <- lapply(corrs, function(x) x[upper.tri(x)])
  
  control <- factor(
    as.vector(sapply(names(corrs), function(x) rep(x,length(corrs[[1]])))),
    levels = c("test","benchmark","test (TMM)","test (DESeq)","test (PoissonSeq)","test (RUVg)","test (RUVs)","test (RUVr)","test (TC)","test (Med)","test (UQ)", "test (QN)")
  )
  neg.corr <- data.frame(
    control=factor(control),
    corr=unname(unlist(corrs))
  )
  
  
  p.corr.frequency.neg.controls.all <- ggplot(neg.corr, aes(x=corr, colour=control, linetype=control)) +
    geom_vline(xintercept = 0, size=.4, color="gray80", linetype="solid") +
    geom_freqpoly(binwidth=.05, alpha=.9, size=.75) +
    theme_classic() +
    xlim(-1, 1) +
    scale_color_manual(values=brewer.pal(n=12, name="Paired")) + 
    theme(legend.position=c(0.85,0.8), 
          legend.title=element_blank(),
          legend.background=element_rect(fill=alpha('white', 0.5))) +
    ylab("Frequency") +
    xlab("Marginal correlation") + 
    theme(legend.key.width = unit(3,"cm"))
  print(p.corr.frequency.neg.controls.all)
  
  
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "MSK_CorrDensityNeg_Freq_TestAll.pdf"), p.corr.frequency.neg.controls.all, width = 8, height=8, device="pdf")
  }
}
## Warning: Removed 24 row(s) containing missing values (geom_path).

## Warning: Removed 24 row(s) containing missing values (geom_path).

if(settings$generate.paper.figs) {
  models <- list(
    "test" = DANA.test$data.model,
    "benchmark" = DANA.bench$data.model,
    "benchmark (TMM)" = DANA.bench$norm.models$benchmark.TMM,
    "benchmark (DESeq)" = DANA.bench$norm.models$benchmark.DESeq,
    "benchmark (PoissonSeq)" = DANA.bench$norm.models$benchmark.PoissonSeq,
    "benchmark (RUVg)" = DANA.bench$norm.models$benchmark.RUVg,
    "benchmark (RUVs)" = DANA.bench$norm.models$benchmark.RUVs,
    "benchmark (RUVr)" = DANA.bench$norm.models$benchmark.RUVr,
    "benchmark (TC)" = DANA.bench$norm.models$benchmark.TC,
    "benchmark (Med)" = DANA.bench$norm.models$benchmark.Med,
    "benchmark (UQ)" = DANA.bench$norm.models$benchmark.UQ,
    "benchmark (QN)" = DANA.bench$norm.models$benchmark.QN
  )
  
  # Set number of genes for negative correlation to minimum found
  num.genes.plot <- min(sapply(models, function(x) dim(x$neg$corr)[1]))
  
  # Subsetted correlation matrices
  corrs <- lapply(models, function(x) x$neg$corr[1:num.genes.plot, 1:num.genes.plot])
  corrs <- lapply(corrs, function(x) x[upper.tri(x)])
  
  control <- factor(
    as.vector(sapply(names(corrs), function(x) rep(x,length(corrs[[1]])))),
    levels = c("test","benchmark","benchmark (TMM)","benchmark (DESeq)","benchmark (PoissonSeq)","benchmark (RUVg)","benchmark (RUVs)","benchmark (RUVr)","benchmark (TC)","benchmark (Med)","benchmark (UQ)", "benchmark (QN)") 
  )
  neg.corr <- data.frame(
    control=factor(control),
    corr=unname(unlist(corrs))
  )
  
  # insert plot
  p.insert <- ggplot(neg.corr, aes(x=corr, colour=control, linetype=control)) +
    geom_vline(xintercept = 0, size=.4, color="gray80", linetype="solid") +
    geom_freqpoly(binwidth=.05, alpha=.9, size=.3) +
    theme_bw() +
    xlim(-1, 1) +
    scale_color_manual(values=brewer.pal(n=12, name="Paired")) +
    theme(legend.position="none",
          axis.title.x = element_blank(),
          axis.title.y = element_blank())
  
  
  p.corr.frequency.neg.controls.all <- ggplot(neg.corr, aes(x=corr, colour=control, linetype=control)) +
    geom_vline(xintercept = 0, size=.4, color="gray80", linetype="solid") +
    geom_freqpoly(binwidth=.05, alpha=.9, size=.75) +
    theme_classic() +
    xlim(-1, 1) +
    ylim(0,500) +
    scale_color_manual(values=brewer.pal(n=12, name="Paired")) +
    theme(legend.position=c(0.84,0.8), 
          legend.title=element_blank(),
          legend.background=element_rect(fill=alpha('white', 0.5))) +
    ylab("Frequency") +
    xlab("Marginal correlation") + 
    theme(legend.key.width = unit(2.5,"cm")) +
    annotation_custom(ggplotGrob(p.insert), xmin=-1.1, xmax=-0.3, ymin=250, ymax=500)
  print(p.corr.frequency.neg.controls.all)
  
  
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "MSK_CorrDensityNeg_Freq_BenchmarkAll.pdf"), p.corr.frequency.neg.controls.all, width = 8, height=8, device="pdf")
  }
}
## Warning: Removed 24 row(s) containing missing values (geom_path).

## Warning: Removed 24 row(s) containing missing values (geom_path).

## Warning: Removed 24 row(s) containing missing values (geom_path).

Correlation Scatter Plots

All normalization methods for supplementary materials

if(settings$generate.paper.figs) {
  par(mfrow=c(1,1))
  
  # un-normalized vs TMM
  p.scatter.test.TMM <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.TMM$pos$corr,
    clusters=clusters.test, title="TMM", xlab="un-normalized", ylab="TMM", 
    ccc=round(DANA.test$metrics["test.TMM", "cc"],2))
  # un-normalized vs DESeq
  p.scatter.test.DESeq <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.DESeq$pos$corr,
    clusters=clusters.test, title="DESeq", xlab="un-normalized", ylab="DESeq", 
    ccc=round(DANA.test$metrics["test.DESeq", "cc"],2))
  # un-normalized vs PoissonSeq
  p.scatter.test.PoissonSeq <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.PoissonSeq$pos$corr,
    clusters=clusters.test, title="PoissonSeq", xlab="un-normalized", ylab="PoissonSeq", 
    ccc=round(DANA.test$metrics["test.PoissonSeq", "cc"],2))
  # un-normalized vs TC
  p.scatter.test.TC <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.TC$pos$corr,
    clusters=clusters.test, title="TC", xlab="un-normalized", ylab="TC", 
    ccc=round(DANA.test$metrics["test.TC", "cc"],2))
  # un-normalized vs Med
  p.scatter.test.Med <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.Med$pos$corr,
    clusters=clusters.test, title="Med", xlab="un-normalized", ylab="Med", 
    ccc=round(DANA.test$metrics["test.Med", "cc"],2))
  # un-normalized vs RUVg
  p.scatter.test.RUVg <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.RUVg$pos$corr,
    clusters=clusters.test, title="RUVg", xlab="un-normalized", ylab="RUVg", 
    ccc=round(DANA.test$metrics["test.RUVg", "cc"],2))
  # un-normalized vs RUVs
  p.scatter.test.RUVs <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.RUVs$pos$corr,
    clusters=clusters.test, title="RUVs", xlab="un-normalized", ylab="RUVs", 
    ccc=round(DANA.test$metrics["test.RUVs", "cc"],2))
  # un-normalized vs RUVr
  p.scatter.test.RUVr <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.RUVr$pos$corr,
    clusters=clusters.test, title="RUVr", xlab="un-normalized", ylab="RUVr", 
    ccc=round(DANA.test$metrics["test.RUVr", "cc"],2))
  # un-normalized vs UQ
  p.scatter.test.UQ <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.UQ$pos$corr,
    clusters=clusters.test, title="UQ", xlab="un-normalized", ylab="UQ", 
    ccc=round(DANA.test$metrics["test.UQ", "cc"],2))
  # un-normalized vs QN
  p.scatter.test.QN <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.QN$pos$corr,
    clusters=clusters.test, title="QN", xlab="un-normalized", ylab="QN", 
    ccc=round(DANA.test$metrics["test.QN", "cc"],2))
  
  p.scatter.test.all <- plot_grid(p.scatter.test.TMM,
                                  p.scatter.test.DESeq,
                                  p.scatter.test.PoissonSeq,
                                  p.scatter.test.TC,
                                  p.scatter.test.Med,
                                  p.scatter.test.RUVg,
                                  p.scatter.test.RUVs,
                                  p.scatter.test.RUVr,
                                  p.scatter.test.UQ,
                                  p.scatter.test.QN,
                                  ncol = 3,
                                  align="hv")
  plot(p.scatter.test.all)
  
  # Save plots
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "MSK_Scatter_All.pdf"), p.scatter.test.all, width=9, height=12, device="pdf")
  }
}
## Warning in plot.corr.scatter(corr1 = DANA.test$data.model$pos$corr, corr2 = DANA.test$norm.models$test.RUVs$pos$corr, : Warning: Shapes of correlation matrices in plot.corr.scatter do not match!
##             Reducing correlation matrices to all mutual genes.
## Warning in plot.corr.scatter(corr1 = DANA.test$data.model$pos$corr, corr2 = DANA.test$norm.models$test.QN$pos$corr, : Warning: Shapes of correlation matrices in plot.corr.scatter do not match!
##             Reducing correlation matrices to all mutual genes.

if(settings$generate.paper.figs) {
  par(mfrow=c(1,1))
  
  # un-normalized vs TMM
  p.scatter.benchmark.TMM <- plot.corr.scatter(
    corr1=DANA.bench$data.model$pos$corr, 
    corr2=DANA.bench$norm.models$benchmark.TMM$pos$corr,
    clusters=clusters.bench, title="TMM", xlab="un-normalized", ylab="TMM", 
    ccc=round(DANA.bench$metrics["benchmark.TMM", "cc"],2))
  # un-normalized vs DESeq
  p.scatter.benchmark.DESeq <- plot.corr.scatter(
    corr1=DANA.bench$data.model$pos$corr, 
    corr2=DANA.bench$norm.models$benchmark.DESeq$pos$corr,
    clusters=clusters.bench, title="DESeq", xlab="un-normalized", ylab="DESeq", 
    ccc=round(DANA.bench$metrics["benchmark.DESeq", "cc"],2))
  # un-normalized vs PoissonSeq
  p.scatter.benchmark.PoissonSeq <- plot.corr.scatter(
    corr1=DANA.bench$data.model$pos$corr, 
    corr2=DANA.bench$norm.models$benchmark.PoissonSeq$pos$corr,
    clusters=clusters.bench, title="PoissonSeq", xlab="un-normalized", ylab="PoissonSeq", 
    ccc=round(DANA.bench$metrics["benchmark.PoissonSeq", "cc"],2))
  # un-normalized vs TC
  p.scatter.benchmark.TC <- plot.corr.scatter(
    corr1=DANA.bench$data.model$pos$corr, 
    corr2=DANA.bench$norm.models$benchmark.TC$pos$corr,
    clusters=clusters.bench, title="TC", xlab="un-normalized", ylab="TC", 
    ccc=round(DANA.bench$metrics["benchmark.TC", "cc"],2))
  # un-normalized vs Med
  p.scatter.benchmark.Med <- plot.corr.scatter(
    corr1=DANA.bench$data.model$pos$corr, 
    corr2=DANA.bench$norm.models$benchmark.Med$pos$corr,
    clusters=clusters.bench, title="Med", xlab="un-normalized", ylab="Med", 
    ccc=round(DANA.bench$metrics["benchmark.Med", "cc"],2))
  # un-normalized vs RUVg
  p.scatter.benchmark.RUVg <- plot.corr.scatter(
    corr1=DANA.bench$data.model$pos$corr, 
    corr2=DANA.bench$norm.models$benchmark.RUVg$pos$corr,
    clusters=clusters.bench, title="RUVg", xlab="un-normalized", ylab="RUVg", 
    ccc=round(DANA.bench$metrics["benchmark.RUVg", "cc"],2))
  # un-normalized vs RUVs
  p.scatter.benchmark.RUVs <- plot.corr.scatter(
    corr1=DANA.bench$data.model$pos$corr, 
    corr2=DANA.bench$norm.models$benchmark.RUVs$pos$corr,
    clusters=clusters.bench, title="RUVs", xlab="un-normalized", ylab="RUVs", 
    ccc=round(DANA.bench$metrics["benchmark.RUVs", "cc"],2))
  # un-normalized vs RUVr
  p.scatter.benchmark.RUVr <- plot.corr.scatter(
    corr1=DANA.bench$data.model$pos$corr, 
    corr2=DANA.bench$norm.models$benchmark.RUVr$pos$corr,
    clusters=clusters.bench, title="RUVr", xlab="un-normalized", ylab="RUVr", 
    ccc=round(DANA.bench$metrics["benchmark.RUVr", "cc"],2))
  # un-normalized vs UQ
  p.scatter.benchmark.UQ <- plot.corr.scatter(
    corr1=DANA.bench$data.model$pos$corr, 
    corr2=DANA.bench$norm.models$benchmark.UQ$pos$corr,
    clusters=clusters.bench, title="UQ", xlab="un-normalized", ylab="UQ", 
    ccc=round(DANA.bench$metrics["benchmark.UQ", "cc"],2))
  # un-normalized vs QN
  p.scatter.benchmark.QN <- plot.corr.scatter(
    corr1=DANA.bench$data.model$pos$corr, 
    corr2=DANA.bench$norm.models$benchmark.QN$pos$corr,
    clusters=clusters.bench, title="QN", xlab="un-normalized", ylab="QN", 
    ccc=round(DANA.bench$metrics["benchmark.QN", "cc"],2))
  
  p.scatter.benchmark.all <- plot_grid(p.scatter.benchmark.TMM,
                                  p.scatter.benchmark.DESeq,
                                  p.scatter.benchmark.PoissonSeq,
                                  p.scatter.benchmark.TC,
                                  p.scatter.benchmark.Med,
                                  p.scatter.benchmark.RUVg,
                                  p.scatter.benchmark.RUVs,
                                  p.scatter.benchmark.RUVr,
                                  p.scatter.benchmark.UQ,
                                  p.scatter.benchmark.QN,
                                  ncol = 3,
                                  align="hv")
  plot(p.scatter.benchmark.all)
  
  # Save plots
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "MSK_Scatter_All_Bench.pdf"), p.scatter.benchmark.all, width=9, height=12, device="pdf")
  }
}
## Warning in plot.corr.scatter(corr1 = DANA.bench$data.model$pos$corr, corr2 = DANA.bench$norm.models$benchmark.RUVs$pos$corr, : Warning: Shapes of correlation matrices in plot.corr.scatter do not match!
##             Reducing correlation matrices to all mutual genes.

Subset for paper

if(settings$generate.paper.figs) {
  ## Generate correlation scatter plots for positive controls for TMM and UQ
  par(mfrow=c(1,1))
  
  # un-normalized vs TMM
  p.scatter.test.TMM <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.TMM$pos$corr,
    clusters=clusters.test,
    title="",
    xlab="un-normalized", 
    ylab="TMM", 
    threshold=0,
    ccc=round(DANA.test$metrics["test.TMM", "cc"],3))
  print(p.scatter.test.TMM)
  # un-normalized vs UQ
  p.scatter.test.UQ <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.UQ$pos$corr,
    clusters=clusters.test,
    title="",
    xlab="un-normalized", 
    ylab="UQ", 
    threshold=0,
    ccc=round(DANA.test$metrics["test.UQ", "cc"],3))
  print(p.scatter.test.UQ)
  # un-normalized vs RUVr
  p.scatter.test.RUVr <- plot.corr.scatter(
    corr1=DANA.test$data.model$pos$corr, 
    corr2=DANA.test$norm.models$test.RUVr$pos$corr,
    clusters=clusters.test,
    title="",
    xlab="un-normalized", 
    ylab="RUVr", 
    threshold=0,
    ccc=round(DANA.test$metrics["test.RUVr", "cc"],3))
  print(p.scatter.test.UQ)
  
  # Save plots
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "MSK_ScatterTMM.pdf"), p.scatter.test.TMM, width=4, height=4, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "MSK_ScatterUQ.pdf"), p.scatter.test.UQ, width=4, height=4, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "MSK_ScatterRUVr.pdf"), p.scatter.test.RUVr, width=4, height=4, device="pdf")
  }
}

if(settings$generate.paper.figs) {
  ## Generate correlation scatter plots for positive controls for TMM and UQ
  par(mfrow=c(1,1))
  
  # un-normalized vs TMM
  p.scatter.benchmark.TMM <- plot.corr.scatter(
    corr1=DANA.bench$data.model$pos$corr, 
    corr2=DANA.bench$norm.models$benchmark.TMM$pos$corr,
    clusters=clusters.bench,
    title="TMM vs. un-normalized",
    xlab="un-normalized", 
    ylab="TMM", 
    threshold=0,
    ccc=round(DANA.bench$metrics["benchmark.TMM", "cc"],3))
  print(p.scatter.benchmark.TMM)
  # un-normalized vs UQ
  p.scatter.benchmark.UQ <- plot.corr.scatter(
    corr1=DANA.bench$data.model$pos$corr, 
    corr2=DANA.bench$norm.models$benchmark.UQ$pos$corr,
    clusters=clusters.bench,
    title="UQ vs. un-normalized",
    xlab="un-normalized", 
    ylab="UQ", 
    threshold=0,
    ccc=round(DANA.bench$metrics["benchmark.UQ", "cc"],3))
  print(p.scatter.benchmark.UQ)
  
  # Save plots
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "MSK_ScatterTMM_benchmark.pdf"), p.scatter.benchmark.TMM, width=4, height=4, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "MSK_ScatterUQ_benchmark.pdf"), p.scatter.benchmark.UQ, width=4, height=4, device="pdf")
  }
}

DANA Result Metrics

if(settings$generate.paper.figs) { 
  options(scipen=4) # force non-scientific notation of x axis
  
  
  test.DANA.metrics <- data.frame(
    method=substr(rownames(DANA.test$metrics), 6, 20),
    cc=DANA.test$metrics[, "cc"],
    mscr=DANA.test$metrics[, "mscr"]
  )
  
  p.metrics.test <- ggplot(test.DANA.metrics,aes(x=mscr,y=cc, label=method)) + 
    geom_point(alpha=.5) +
    theme_classic() + 
    xlab(TeX("$mscr^-$; Relative reduction of handling effects")) + 
    ylab(TeX("$cc^+$; Biological signal preservation")) + 
    geom_text_repel(aes(label = method), size=3) +
    # xlim(c(min(c(0, test.mposc.reduction)),max(test.mposc.reduction))) +
    scale_x_continuous(labels = scales::percent, limits=c(0,1.05), 
                       breaks = scales::pretty_breaks(n = 5)) +
    # ylim(c(0,1)) 
    scale_y_continuous(labels = scales::percent, limits = c(0.5,1))
  print(p.metrics.test)
  
  print("DANA Statistics for test data")
  test.DANA.metrics$cc <- round(test.DANA.metrics$cc,3)
  test.DANA.metrics$mscr <- round(test.DANA.metrics$mscr,3)
  print(test.DANA.metrics)
  
  
  bench.DANA.metrics <- data.frame(
    method=substr(rownames(DANA.bench$metrics), 11, 30),
    cc=DANA.bench$metrics[, "cc"],
    mscr=DANA.bench$metrics[, "mscr"]
  )
  
  p.metrics.bench <- ggplot(bench.DANA.metrics, aes(x=mscr, y=cc, label=method)) + 
    geom_point(alpha=.5) +
    theme_classic() + 
    xlab(TeX("$mscr^-$; Relative reduction of handling effects")) + 
    ylab(TeX("$cc^+$; Biological signal preservation")) + 
    geom_text_repel(aes(label = method), size=3, max.overlaps = Inf) +
    # xlim(c(min(c(0,bench.mposc.reduction)),max(bench.mposc.reduction))) +
    scale_x_continuous(labels = scales::percent, limits=c(0, 1.05),
                       breaks = scales::pretty_breaks(n = 5)) +
    # ylim(c(0,1))
    scale_y_continuous(labels = scales::percent, limits = c(0, 1)) 
  print(p.metrics.bench)
  print("DANA Statistics for benchmark data")
  bench.DANA.metrics$cc <- round(bench.DANA.metrics$cc,3)
  bench.DANA.metrics$mscr <- round(bench.DANA.metrics$mscr,3)
  print(bench.DANA.metrics)
  
  
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "MSK_MetricsTest.pdf"), p.metrics.test, width=4.5, height=3.5, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "MSK_MetricsBench.pdf"), p.metrics.bench, width=5.5, height=4.5, device="pdf")
  }
  
}

## [1] "DANA Statistics for test data"
##        method    cc  mscr
## 1         TMM 0.955 0.433
## 2       DESeq 0.952 0.444
## 3  PoissonSeq 0.960 0.341
## 4          TC 0.961 0.437
## 5         Med 0.682 0.341
## 6        RUVg 0.960 0.479
## 7        RUVs 0.920 0.342
## 8        RUVr 0.812 0.725
## 9          UQ 0.701 0.344
## 10         QN 0.938 0.259
## [1] "DANA Statistics for benchmark data"
##        method    cc  mscr
## 1         TMM 0.961 0.635
## 2       DESeq 0.961 0.628
## 3  PoissonSeq 0.963 0.645
## 4          TC 0.955 0.538
## 5         Med 0.904 0.552
## 6        RUVg 0.980 0.518
## 7        RUVs 0.840 0.505
## 8        RUVr 0.937 0.635
## 9          UQ 0.915 0.569
## 10         QN 0.922 0.630

DEA Results

if(settings$generate.paper.figs && settings$perform.DEA) { 
  # FNR-FDR Plot
  test.DEA.res <- data.frame(method=substr(names(test.norm.DEA.stats), 6, 20),
                             FDR=-sapply(test.norm.DEA.stats, function(x) x$FDR)+1,
                             FNR=-sapply(test.norm.DEA.stats, function(x) x$FNR)+1)
  p.test.DEA.FNR.FDR <- ggplot(test.DEA.res, aes(x=FNR, y=FDR, label=method)) +
      theme_classic() + 
      geom_point(alpha=1) + 
      xlab("Sensitivity") + # True positive rate(1-FNR)
      ylab("Positive predictive value") + # Positive predictive value (1-FDR)
      geom_text_repel(aes(label = method), size=3) +
      scale_x_continuous(labels = scales::percent, limits=c(0,1),
                         breaks = scales::pretty_breaks(n = 4)) +
      scale_y_continuous(labels = scales::percent, limits = c(0,1))
  print(p.test.DEA.FNR.FDR)
  
  # CAT Plot
  d <- append(list("test.No Norm"=test.DEA), test.norm.DEA)
  names(d) <- substr(names(d), 6, 20)
  p.test.DEA.CAT.1 <- plot.CAT(DEA=d[1:7], 
                             truth.DEA=bench.DEA, 
                             title="Significance ranks of miRNAs",
                             maxrank=100)
  print(p.test.DEA.CAT.1)
  p.test.DEA.CAT.2 <- plot.CAT(DEA=d[c(1,8:11)], 
                             truth.DEA=bench.DEA, 
                             title="Significance ranks of miRNAs",
                             maxrank=100)
  print(p.test.DEA.CAT.2)
  rm(d)
  
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "MSK_DEA_FNR_FDR.pdf"), p.test.DEA.FNR.FDR, width=4.5, height=3.5, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "MSK_DEA_CAT.pdf"), p.test.DEA.CAT, width=4.5, height=3.5, device="pdf")
  }
}

DANA results for different choices of control cutoffs

if(settings$generate.paper.figs && settings$compare.cutoffs) { 
  # Mean sd plots
  p.meanvar.conf1 <- plot.mean.sd(test.RC, cutoff.conf1$t.zero, 
                                  cutoff.conf1$t.poor, cutoff.conf1$t.well) 
  p.meanvar.conf2 <- plot.mean.sd(test.RC, cutoff.conf2$t.zero, 
                                  cutoff.conf2$t.poor, cutoff.conf2$t.well)
  p.meanvar.conf3 <- plot.mean.sd(test.RC, cutoff.conf3$t.zero, 
                                  cutoff.conf3$t.poor, cutoff.conf3$t.well)
  
  # RC histograms
  p.RC.conf1 <- plot.countHist(test.RC, binwidth = 0.1, cutoff.conf1$t.zero, 
                               cutoff.conf1$t.poor, cutoff.conf1$t.well) +
    annotate("label", alpha=0.5, x=10, y=75, hjust=0, vjust=0, size=2.75,
             label=paste0(" neg. control: [",cutoff.conf1$t.zero ,", ",cutoff.conf1$t.poor, "] \n pos. control: [",cutoff.conf1$t.well ,", inf] "))
  p.RC.conf2 <- plot.countHist(test.RC, binwidth = 0.1, cutoff.conf2$t.zero, 
                               cutoff.conf2$t.poor, cutoff.conf2$t.well) +
    annotate("label", alpha=0.5, x=10, y=75, hjust=0, vjust=0, size=2.75,
             label=paste0(" neg. control: [",cutoff.conf2$t.zero ,", ",cutoff.conf2$t.poor, "] \n pos. control: [",cutoff.conf2$t.well ,", inf] "))
  p.RC.conf3 <- plot.countHist(test.RC, binwidth = 0.1, cutoff.conf3$t.zero, 
                               cutoff.conf3$t.poor, cutoff.conf3$t.well) +
    annotate("label", alpha=0.5, x=10, y=75, hjust=0, vjust=0, size=2.75,
             label=paste0(" neg. control: [",cutoff.conf3$t.zero ,", ",cutoff.conf3$t.poor, "] \n pos. control: [",cutoff.conf3$t.well ,", inf] "))
  
  # DANA result metrics
  p.result.DANA.1 <- plot.DANA.metrics(DANA.1$metrics, label.repel = TRUE, label.size=2.5)
  p.result.DANA.2 <- plot.DANA.metrics(DANA.2$metrics, label.repel = TRUE, label.size=2.5)
  p.result.DANA.3 <- plot.DANA.metrics(DANA.3$metrics, label.repel = TRUE, label.size=2.5)
  
  p.cutoffs <-
    plot_grid(p.RC.conf2,
              p.RC.conf1 + labs(x="Mean log2(read count)", y=element_blank()),
              p.RC.conf3 + labs(x="Mean log2(read count)", y=element_blank()),
              p.meanvar.conf2,
              p.meanvar.conf1 + labs(x="Mean log2(read count)", y=element_blank()),
              p.meanvar.conf3 + labs(x="Mean log2(read count)", y=element_blank()),
              p.result.DANA.2,
              p.result.DANA.1 + labs(y=element_blank()),
              p.result.DANA.3 + labs(y=element_blank()),
              align="hv",
              labels=c("a","b","c"))
  print(p.cutoffs)
  
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "MSK_Test_Cutoff_Comparison.pdf"), p.cutoffs, width=10, height=10, device="pdf")
  }
}
## Warning: Removed 1 rows containing missing values (geom_bar).

## Warning: Removed 1 rows containing missing values (geom_bar).

## Warning: Removed 1 rows containing missing values (geom_bar).

DANA Results for different partial correlation estimation methods

if(settings$generate.paper.figs && settings$compare.corr.methods) { 
  methods.name <- c("MB (BIC)", "MB (AIC)", "MB (CV)","MB (AV)", "MB (RIC)", "MB (StARS)", "glasso (RIC)", "glasso (StARS)", "FastGGM", "Pearson", "Spearman")
  
  ## Between method correlation
  # Compute pearson correlation between cc+ for different methods
  compare.test.cc <- cbind(DANA.test.mb.bic.res$metrics$cc,
                           DANA.test.mb.aic.res$metrics$cc,
                           DANA.test.mb.cv.res$metrics$cc,
                           DANA.test.mb.av.res$metrics$cc,
                           DANA.test.huge.mb.ric.res$metrics$cc,
                           DANA.test.huge.mb.stars.res$metrics$cc,
                           DANA.test.glasso.ric.res$metrics$cc,
                           DANA.test.glasso.stars.res$metrics$cc,
                           DANA.test.fastggm.res$metrics$cc,
                           DANA.test.pearson.res$metrics$cc,
                           DANA.test.spearman.res$metrics$cc)
  rownames(compare.test.cc) <- rownames(DANA.test.mb.bic.res$metrics)
  colnames(compare.test.cc) <- methods.name
  p.pcorr.cc.corr <- corrplot.mixed(cor(compare.test.cc),lower="circle",upper="number")
  p.pcorr.cc.corr <- ggcorrplot(cor(compare.test.cc), type="upper", lab=TRUE)
  plot(p.pcorr.cc.corr)
  
  
  
  ## Display result statistics plot
  # MB (bic)
  p.test.mb.bic.stats <- plot.DANA.metrics(DANA.test.mb.bic.res$metrics, label.repel=TRUE, label.size = 2.5) + ggtitle("MB (BIC)") + theme(plot.title = element_text(size=12))
  # MB (aic)
  p.test.mb.aic.stats <- plot.DANA.metrics(DANA.test.mb.aic.res$metrics, label.repel=TRUE, label.size = 2.5) + ggtitle("MB (AIC)") + theme(plot.title = element_text(size=12))
  # MB (cv)
  p.test.mb.cv.stats <- plot.DANA.metrics(DANA.test.mb.cv.res$metrics, label.repel=TRUE, label.size = 2.5) + ggtitle("MB (CV)") + theme(plot.title = element_text(size=12))
  # MB (av)
  p.test.mb.av.stats <- plot.DANA.metrics(DANA.test.mb.av.res$metrics, label.repel=TRUE, label.size = 2.5) + ggtitle("MB (AV)") + theme(plot.title = element_text(size=12))
  # huge.mb (ric)
  p.test.huge.mb.ric.stats <- plot.DANA.metrics(DANA.test.huge.mb.ric.res$metrics, label.repel=TRUE, label.size = 2.5) + ggtitle("MB (RIC)") + theme(plot.title = element_text(size=12))
  # huge.mb (stars)
  p.test.huge.mb.stars.stats <- plot.DANA.metrics(DANA.test.huge.mb.stars.res$metrics, label.repel=TRUE, label.size = 2.5) + ggtitle("MB (StARS)") + theme(plot.title = element_text(size=12))
  # glasso (ric)
  p.test.glasso.ric.stats <- plot.DANA.metrics(DANA.test.glasso.ric.res$metrics, label.repel=TRUE, label.size = 2.5) + ggtitle("glasso (RIC)") + theme(plot.title = element_text(size=12))
  # glasso (stars)
  p.test.glasso.stars.stats <- plot.DANA.metrics(DANA.test.glasso.stars.res$metrics, label.repel=TRUE, label.size = 2.5) + ggtitle("glasso (StARS)") + theme(plot.title = element_text(size=12))
  # FastGGM
  p.test.fastggm.stats <- plot.DANA.metrics(DANA.test.fastggm.res$metrics, label.repel=TRUE, label.size = 2.5) + ggtitle("FastGGM") + theme(plot.title = element_text(size=12))
  # Pearson
  p.test.pearson.stats <- plot.DANA.metrics(DANA.test.pearson.res$metrics, label.repel=TRUE, label.size = 2.5) + ggtitle("Pearson") + theme(plot.title = element_text(size=12))
  # Spearman
  p.test.spearman.stats <- plot.DANA.metrics(DANA.test.spearman.res$metrics, label.repel=TRUE, label.size = 2.5) + ggtitle("Spearman") + theme(plot.title = element_text(size=12))
  
  # Plot
  p.partialCorr.compare.metrics <-
    plot_grid(p.test.mb.bic.stats + labs(x="mscr-", y="cc+"),
              p.test.mb.aic.stats + labs(x="mscr-", y=element_blank()),
              p.test.mb.cv.stats + labs(x="mscr-", y=element_blank()),
              p.test.mb.av.stats + labs(x="mscr-", y="cc+"),
              p.test.huge.mb.ric.stats + labs(x="mscr-", y=element_blank()),
              p.test.huge.mb.stars.stats + labs(x="mscr-", y=element_blank()),
              p.test.glasso.ric.stats + labs(x="mscr-", y="cc+"),
              p.test.glasso.stars.stats + labs(x="mscr-", y=element_blank()),
              p.test.fastggm.stats + labs(x="mscr-", y=element_blank()),
              p.test.pearson.stats + labs(x="mscr-", y="cc+"),
              p.test.spearman.stats + labs(x="mscr-", y=element_blank()),
              ncol=3,
              align="hv")
  plot(p.partialCorr.compare.metrics)
  
  ## Correlation Plots
  p.corr.mb.bic <- ggplot.corr(DANA.test.mb.bic.res$data.model$pos$corr, clusters, title="MB (BIC)")
  p.corr.mb.aic <- ggplot.corr(DANA.test.mb.aic.res$data.model$pos$corr, clusters, title="MB (AIC)")
  p.corr.mb.cv <- ggplot.corr(DANA.test.mb.cv.res$data.model$pos$corr, clusters, title="MB (CV)")
  p.corr.mb.av <- ggplot.corr(DANA.test.mb.av.res$data.model$pos$corr, clusters, title="MB (AV)")
  p.corr.huge.mb.ric <- ggplot.corr(DANA.test.huge.mb.ric.res$data.model$pos$corr, clusters, title="MB (RIC)")
  p.corr.huge.mb.stars <- ggplot.corr(DANA.test.huge.mb.stars.res$data.model$pos$corr, clusters, title="MB (StARS)")
  p.corr.glasso.ric <- ggplot.corr(DANA.test.glasso.ric.res$data.model$pos$corr, clusters, title="glasso (RIC)")
  p.corr.glasso.stars <- ggplot.corr(DANA.test.glasso.stars.res$data.model$pos$corr, clusters, title="glasso (StARS)")  
  p.corr.fastggm <- ggplot.corr(DANA.test.fastggm.res$data.model$pos$corr, clusters, title="FastGGM")
  p.corr.pearson <- ggplot.corr(DANA.test.pearson.res$data.model$pos$corr, clusters, title="Pearson")
  p.corr.spearman <- ggplot.corr(DANA.test.spearman.res$data.model$pos$corr, clusters, title="Spearman")
  
  # Plot
  p.partialCorr.compare.pcorrs <-
    plot_grid(p.corr.mb.bic + theme(legend.position="none"),
              p.corr.mb.aic + theme(legend.position="none"),
              p.corr.mb.cv + theme(legend.position="none"),
              p.corr.mb.av + theme(legend.position="none"),
              p.corr.huge.mb.ric + theme(legend.position="none"),
              p.corr.huge.mb.stars + theme(legend.position="none"),
              p.corr.glasso.ric + theme(legend.position="none"),
              p.corr.glasso.stars + theme(legend.position="none"),
              p.corr.fastggm + theme(legend.position="none"),
              p.corr.pearson + theme(legend.position="none"),
              p.corr.spearman + theme(legend.position="none"),
              get.legend(p.corr.mb.bic + theme(legend.position = "bottom")),
              ncol=3)
  plot(p.partialCorr.compare.pcorrs)
  
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "MSK_PCorr_Comparison_Metrics.pdf"), p.partialCorr.compare.metrics, width=9, height=12, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "MSK_PCorr_Comparison_CC_Corr.pdf"), p.pcorr.cc.corr, width=7, height=7, device="pdf")
    ggsave(paste0(settings$paper.fig.path, "MSK_PCorr_Corr_All.pdf"), p.partialCorr.compare.pcorrs, width=9, height=12, device="pdf")
  }
}

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

Arrange plots for paper

if(settings$generate.paper.figs) { 
  top.row <-
    plot_grid(NULL,
              p.corr.pos.three,
              NULL,
              labels=c("", ""),
              rel_heights = c(0.5 ,10, 0.2),
              ncol=1)
  bottom.row <- 
    plot_grid(NULL,
              p.scatter.test.TMM,
              p.scatter.test.RUVr,
              NULL,
              align="h",
              labels=c("", "b", "", ""),
              vjust = 0,
              rel_widths = c(1,2.5,2.5,1),
              nrow=1)
  p.results.paper <- 
    plot_grid(top.row, 
              bottom.row,
              # align="h",
              labels=c("a", ""),
              rel_heights = c(5,3.5),
              ncol=1)
  plot(p.results.paper)
  
  
  if(settings$export.figures) {
    ggsave(paste0(settings$paper.fig.path, "MSK_posCorr_results.pdf"), p.results.paper, width=8, height=6.5, device="pdf")
  }
}

Package Information

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] de_DE.UTF-8/de_DE.UTF-8/de_DE.UTF-8/C/de_DE.UTF-8/de_DE.UTF-8
## 
## attached base packages:
## [1] splines   stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ggcorrplot_0.1.3            latex2exp_0.5.0            
##  [3] RColorBrewer_1.1-2          cowplot_1.1.1              
##  [5] openxlsx_4.2.4              ffpe_1.38.0                
##  [7] TTR_0.24.2                  DescTools_0.99.44          
##  [9] vsn_3.62.0                  RUVSeq_1.28.0              
## [11] EDASeq_2.28.0               ShortRead_1.52.0           
## [13] GenomicAlignments_1.30.0    Rsamtools_2.10.0           
## [15] Biostrings_2.62.0           XVector_0.34.0             
## [17] sva_3.42.0                  BiocParallel_1.28.2        
## [19] genefilter_1.76.0           mgcv_1.8-38                
## [21] nlme_3.1-153                PoissonSeq_1.1.2           
## [23] combinat_0.0-8              DESeq2_1.34.0              
## [25] SummarizedExperiment_1.24.0 Biobase_2.54.0             
## [27] MatrixGenerics_1.6.0        matrixStats_0.61.0         
## [29] GenomicRanges_1.46.1        GenomeInfoDb_1.30.0        
## [31] IRanges_2.28.0              S4Vectors_0.32.3           
## [33] BiocGenerics_0.40.0         edgeR_3.36.0               
## [35] limma_3.50.0                FastGGM_1.0                
## [37] RcppParallel_5.1.4          Rcpp_1.0.7                 
## [39] huge_1.3.5                  glmnet_4.1-3               
## [41] Matrix_1.3-4                ggrepel_0.9.1              
## [43] plotly_4.10.0               stargazer_5.2.2            
## [45] corrplot_0.92               ggnewscale_0.4.5           
## [47] gridExtra_2.3               ggplot2_3.3.5              
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.2                R.utils_2.11.0           
##   [3] tidyselect_1.1.1          RSQLite_2.2.9            
##   [5] AnnotationDbi_1.56.2      htmlwidgets_1.5.4        
##   [7] grid_4.1.2                munsell_0.5.0            
##   [9] codetools_0.2-18          preprocessCore_1.56.0    
##  [11] nleqslv_3.3.2             withr_2.4.3              
##  [13] colorspace_2.0-2          filelock_1.0.2           
##  [15] highr_0.9                 knitr_1.36               
##  [17] rstudioapi_0.13           labeling_0.4.2           
##  [19] GenomeInfoDbData_1.2.7    hwriter_1.3.2            
##  [21] farver_2.1.0              bit64_4.0.5              
##  [23] rhdf5_2.38.0              vctrs_0.3.8              
##  [25] generics_0.1.1            xfun_0.28                
##  [27] BiocFileCache_2.2.0       R6_2.5.1                 
##  [29] illuminaio_0.36.0         locfit_1.5-9.4           
##  [31] reshape_0.8.8             bitops_1.0-7             
##  [33] rhdf5filters_1.6.0        cachem_1.0.6             
##  [35] DelayedArray_0.20.0       assertthat_0.2.1         
##  [37] BiocIO_1.4.0              scales_1.1.1             
##  [39] rootSolve_1.8.2.3         gtable_0.3.0             
##  [41] affy_1.72.0               methylumi_2.40.1         
##  [43] lmom_2.8                  rlang_0.4.12             
##  [45] rtracklayer_1.54.0        lazyeval_0.2.2           
##  [47] GEOquery_2.62.1           reshape2_1.4.4           
##  [49] BiocManager_1.30.16       yaml_2.2.1               
##  [51] crosstalk_1.2.0           GenomicFeatures_1.46.1   
##  [53] tools_4.1.2               nor1mix_1.3-0            
##  [55] affyio_1.64.0             ellipsis_0.3.2           
##  [57] lumi_2.46.0               jquerylib_0.1.4          
##  [59] siggenes_1.68.0           proxy_0.4-26             
##  [61] plyr_1.8.6                sparseMatrixStats_1.6.0  
##  [63] progress_1.2.2            zlibbioc_1.40.0          
##  [65] purrr_0.3.4               RCurl_1.98-1.5           
##  [67] prettyunits_1.1.1         openssl_1.4.5            
##  [69] bumphunter_1.36.0         zoo_1.8-9                
##  [71] sfsmisc_1.1-12            magrittr_2.0.1           
##  [73] data.table_1.14.2         mvtnorm_1.1-3            
##  [75] aroma.light_3.24.0        hms_1.1.1                
##  [77] evaluate_0.14             xtable_1.8-4             
##  [79] XML_3.99-0.8              jpeg_0.1-9               
##  [81] mclust_5.4.8              shape_1.4.6              
##  [83] compiler_4.1.2            biomaRt_2.50.1           
##  [85] minfi_1.40.0              tibble_3.1.6             
##  [87] KernSmooth_2.23-20        crayon_1.4.2             
##  [89] R.oo_1.24.0               htmltools_0.5.2          
##  [91] tzdb_0.2.0                tidyr_1.1.4              
##  [93] geneplotter_1.72.0        expm_0.999-6             
##  [95] Exact_3.1                 DBI_1.1.1                
##  [97] dbplyr_2.1.1              MASS_7.3-54              
##  [99] rappdirs_0.3.3            boot_1.3-28              
## [101] readr_2.1.1               quadprog_1.5-8           
## [103] R.methodsS3_1.8.1         parallel_4.1.2           
## [105] igraph_1.2.9              pkgconfig_2.0.3          
## [107] xml2_1.3.3                foreach_1.5.1            
## [109] annotate_1.72.0           rngtools_1.5.2           
## [111] multtest_2.50.0           beanplot_1.2             
## [113] doRNG_1.8.2               scrime_1.3.5             
## [115] stringr_1.4.0             digest_0.6.29            
## [117] base64_2.0                rmarkdown_2.11           
## [119] gld_2.6.3                 DelayedMatrixStats_1.16.0
## [121] restfulr_0.0.13           curl_4.3.2               
## [123] rjson_0.2.20              lifecycle_1.0.1          
## [125] jsonlite_1.7.2            Rhdf5lib_1.16.0          
## [127] askpass_1.1               viridisLite_0.4.0        
## [129] fansi_0.5.0               pillar_1.6.4             
## [131] lattice_0.20-45           KEGGREST_1.34.0          
## [133] fastmap_1.1.0             httr_1.4.2               
## [135] survival_3.2-13           glue_1.5.1               
## [137] xts_0.12.1                zip_2.2.0                
## [139] png_0.1-7                 iterators_1.0.13         
## [141] bit_4.0.4                 class_7.3-19             
## [143] stringi_1.7.6             HDF5Array_1.22.1         
## [145] blob_1.2.2                latticeExtra_0.6-29      
## [147] memoise_2.0.1             dplyr_1.0.7              
## [149] e1071_1.7-9